def extract_all_attachments(parsed_eml: Message, path: str or None): print_headline_banner('Attachment Extracting') # if no output directory is given then a default directory with the name 'eml_attachments' is used if path is None: path = 'eml_attachments' if not os.path.exists(path): os.makedirs(path) counter = 0 for child in parsed_eml.walk(): if child.get_filename() is None: continue counter += 1 attachment_filename = _get_printable_attachment_filename( attachment=child) output_path = os.path.join(path, attachment_filename) # write attachment to disk payload = child.get_payload(decode=True) output_file = open(output_path, mode='wb') output_file.write(payload) info('Attachment [{}] "{}" extracted to {}'.format( counter, attachment_filename, output_path))
def show_attachments(parsed_eml: Message): print_headline_banner('Attachments') attachments = list() for child in parsed_eml.walk(): if child.get_filename() is not None: attachment_filename = _get_printable_attachment_filename( attachment=child) attachments.append( (attachment_filename, str(child.get_content_type()), str(child.get_content_disposition()))) if len(attachments) == 0: info('E-Mail contains no attachments') else: max_width_filename = max([ len(filename) for (filename, content_type, disposition) in attachments ]) + 7 max_width_content_type = max([ len(content_type) for (filename, content_type, disposition) in attachments ]) + 7 for index, (filename, content_type, disposition) in enumerate(attachments): index_str = '[' + colorize_string(text=str(index + 1), color=Color.CYAN) + ']' print(index_str, filename.ljust(max_width_filename), content_type.ljust(max_width_content_type), disposition) print()
def extract_attachment(parsed_eml: Message, attachment_number: int, output_path: str or None): print_headline_banner('Attachment Extracting') attachment = None counter = 1 for child in parsed_eml.walk(): if child.get_filename() is not None: if counter == attachment_number: attachment = child break counter += 1 # Check if attachment was found if attachment is None: error('Attachment {} could not be found'.format(attachment_number)) return attachment_filename = _get_printable_attachment_filename( attachment=attachment) info('Found attachment [{}] "{}"'.format(attachment_number, attachment_filename)) if output_path is None: output_path = attachment.get_filename() elif os.path.isdir(output_path): output_path = os.path.join(output_path, attachment_filename) payload = attachment.get_payload(decode=True) output_file = open(output_path, mode='wb') output_file.write(payload) info('Attachment extracted to {}'.format(output_path))
def show_html(parsed_eml: Message): print_headline_banner(headline='HTML') html = __get_decoded_payload(parsed_eml=parsed_eml, content_type='text/html') if html is None: info('Email contains no HTML') else: print(html) print()
def show_text(parsed_eml: Message): print_headline_banner(headline='Plaintext') text = __get_decoded_payload(parsed_eml=parsed_eml, content_type='text/plain') if text is None: info('Email contains no plaintext') else: print(text) print()
def show_header(parsed_eml: Message): print_headline_banner(headline='Header') max_key_width = max([len(x) for x, _ in parsed_eml.items()]) for key, value in parsed_eml.items(): values_in_lines = value.split('\n') first_value = values_in_lines.pop(0) print(colorize_string(text=key, color=Color.CYAN) + (max_key_width - len(key) + 5) * '.' + first_value) for x in values_in_lines: x = x.replace('\t', '').strip().replace('\r', '').strip(' ') print((max_key_width + 5) * ' ' + x) print()
def show_urls(parsed_eml: Message): print_headline_banner(headline='URLs in HTML part') all_links = set() html_str = __get_decoded_payload(parsed_eml=parsed_eml, content_type='text/html') if html_str is None: warning('Email contains no HTML') else: for pattern in [r'href="(.+?)"', r"href='(.+?)'"]: for match in re.finditer(pattern, html_str): all_links.add(match.group(1)) if len(all_links) == 0: info(message='No URLs found in the html') for x in all_links: print(' - ' + colorize_string(text=x, color=Color.MAGENTA)) print()
def check_tracking(parsed_eml: Message): print_headline_banner(headline='Reloaded Content (aka. Tracking Pixels)') sources = set() html_str = __get_decoded_payload(parsed_eml=parsed_eml, content_type='text/html') if html_str is None: warning('Email contains no HTML') else: for pattern in [r'src="(.+?)"', r"src='(.+?)'", r'background="(.+?)"', r"background='(.+?)'"]: for match in re.finditer(pattern, html_str): if not match.group(1).startswith('cid:'): sources.add(match.group(1)) if len(sources) == 0: info(message='No content found which will be reloaded from external resources') for x in sources: print(' - ' + colorize_string(text=x, color=Color.MAGENTA)) print()
def show_structure(parsed_eml: Message): print_headline_banner(headline='Structure') __show_structure(parsed_eml=parsed_eml) print()