def parse_email_file(self, file_id, file_extension): file_path = self.get_file(file_id) if file_path["success"] == False: return { "success": False, "reason": "Couldn't get file with ID %s" % file_id } print("File: %s" % file_path) if file_extension.lower() == 'eml': print('working with .eml file') ep = eml_parser.EmlParser() try: parsed_eml = ep.decode_email_bytes(file_path['data']) return json.dumps(parsed_eml, default=json_serial) except Exception as e: return {"Success":"False","Message":f"Exception occured: {e}"} elif file_extension.lower() == 'msg': print('working with .msg file') try: msg = MsOxMessage(file_path['data']) msg_properties_dict = msg.get_properties() print(msg_properties_dict) frozen = jsonpickle.encode(msg_properties_dict) return frozen except Exception as e: return {"Success":"False","Message":f"Exception occured: {e}"} else: return {"Success":"False","Message":f"No file handler for file extension {file_extension}"}
async def parse_email_file(self, file_id, file_extension): if file_extension.lower() == 'eml': print('working with .eml file') file_path = self.get_file(file_id) print(file_path['data']) ep = eml_parser.EmlParser() try: parsed_eml = ep.decode_email_bytes(file_path['data']) return json.dumps(parsed_eml, default=json_serial) except Exception as e: return { "Success": "False", "Message": f"Exception occured: {e}" } if file_extension.lower() == 'msg': print('working with .msg file') file_path = self.get_file(file_id) print(file_path['data']) try: msg = MsOxMessage(file_path['data']) msg_properties_dict = msg.get_properties() print(msg_properties_dict) frozen = jsonpickle.encode(msg_properties_dict) return frozen except Exception as e: return { "Success": "False", "Message": f"Exception occured: {e}" }
def analyse(self, sample, samplename): try: def json_serial(obj): if isinstance(obj, datetime.datetime): serial = obj.isoformat() return serial with open(sample, 'rb') as fhdl: raw_email = fhdl.read() #Lets Grab All Dem Headers ep = eml_parser.EmlParser() parsed_eml = ep.decode_email_bytes(raw_email) jsonEML = json.dumps(parsed_eml, default=json_serial) jsonEML = json.loads(jsonEML) #Grab the email body and pass into the report with open(sample, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() self.build_report(jsonEML, text) except Exception as e: self.unexpectedError(e)
def to_json(self, filepath): with open(filepath, 'rb') as f: raw_mail = f.read() ep = eml_parser.EmlParser() parsed_eml = ep.decode_email_bytes(raw_mail) return parsed_eml
def EML_Parsing(raw_email): """ Parse the EML data to extract the body of the email as well as attachments and url's contained in the emails :param raw_email: Raw email data :return: eml body, url(binary), attachments(binary) """ ep = eml_parser.EmlParser( include_raw_body=True, include_attachment_data=False, ) parsed_eml = ep.decode_email_bytes(raw_email) # checks if there is an attachment in the email by checking # if there is an attachment key in the parsed eml file if 'attachment' in parsed_eml.keys(): attachment = True else: attachment = False # checks if there is a url specified in the body of the email, does not # include an image or something with a link attached to it try: url_list = parsed_eml['body'][1]['uri'] url = True except KeyError: url = False except IndexError: url = False return [parsed_eml, url, attachment]
def parse_email_headers(self, email_headers): try: email_headers = bytes(email_headers,'utf-8') ep = eml_parser.EmlParser() parsed_headers = ep.decode_email_bytes(email_headers) return json.dumps(parsed_headers, default=json_serial) except Exception as e: raise Exception(e)
def read_eml_file(self): with open('sample_file.eml', 'rb') as email: raw_email = email.read() ep = eml_parser.EmlParser() parsed_eml = ep.decode_email_bytes(raw_email) #print(json.dumps(parsed_eml)) self.get_information( json.dumps(parsed_eml, indent=4, sort_keys=True, default=str))
def extract_body_and_subject_single(email_file): ep = eml_parser.EmlParser(include_raw_body=True) email = ep.decode_email(email_file) return { 'subject': extract_if_possible(email_file, email, 'header', 'subject'), 'body': extract_if_possible(email_file, email, 'body', 0, 'content'), 'body_html': extract_if_possible(email_file, email, 'body', 1, 'content') }
def extractEventDetailsFromEmail(emailFile): # Parse email email = eml_parser.EmlParser(include_raw_body=True).decode_email_bytes(emailFile.read()) # Email subject subject = email['header']['subject'] print('Subject:', subject) # Get plain text from HTML htmlContent = email['body'][0]['content'] return extractEventDetails(htmlContent)
def extraire_contenu_mail(mail): """Fonction utilisant la librairie Python EMLParser transformant un mail (str) en un dict parsable :param mail: mail à parser (format eml) :type mail: str :return parsed_eml: mail parsé :rtype status: dict """ with open(mail, 'rb') as mail_ouvert: raw_email = mail_ouvert.read() ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=False) parsed_eml = ep.decode_email_bytes(raw_email) return parsed_eml
def main(): siemplify = SiemplifyAction() base64_blob = siemplify.parameters.get("Base64 EML Blob") eml_content = base64.b64decode(base64_blob) ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) parsed_eml = ep.decode_email_bytes(eml_content) # print(json.dumps(parsed_eml, default=json_serial)) siemplify.result.add_result_json( json.dumps(parsed_eml, default=json_serial)) siemplify.end("Prased EML. See JSON.", "True")
def get_email_data(m_path): ep = eml_parser.EmlParser() ep.decode_email(m_path) text = ep.get_raw_body_text(ep.msg) date = ep.msg['Date'] deliver_to = ep.msg.get('Delivered-To') reply_to = ep.msg.get('Reply-To') #print(text[0][1]) data_email = { 'text': text[0][1], 'date': date, 'deliver_to': deliver_to, 'reply_to': reply_to } return data_email
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-i', dest='msgfile', help='input file', required=True) parser.add_argument('-d', dest='debug', action='store_true', help='debug (no hashing)') parser.add_argument('-r', dest='fulldata', action='store_true', help='includes raw data of attachments') parser.add_argument('-w', dest='whitelist_ip', help='whitelist IPv4 or IPv6 ip from parsing; comma-separated list of IPs, no spaces !') parser.add_argument('-f', dest='whitelist_email', help='whitelist an email in routing headers "For"; comma-separated list of e-mail addresses, no spaces !') parser.add_argument('-b', dest='byhostentry', help='collect the smtp injector IP using the "by" "host" in routing headers; comma-separated list of IPs, no spaces !') parser.add_argument('--email-force-tld', dest='email_force_tld', action='store_true', help='Only parse e-mail addresses which include a tld.') options = parser.parse_args() msgfile = options.msgfile pconf = {} pconf['whiteip'] = ['192.168.1.1'] pconf['whitefor'] = ['*****@*****.**'] pconf['byhostentry'] = ['example.com'] if options.whitelist_ip is not None: pconf['whiteip'] = options.whitelist_ip.split(',') if options.whitelist_email is not None: pconf['whitefor'] = options.whitelist_email.split(',') if options.byhostentry is not None: pconf['byhostentry'] = options.byhostentry.split(',') with open(msgfile, 'rb') as fhdl: raw_email = fhdl.read() ep = eml_parser.EmlParser(include_raw_body=options.fulldata, include_attachment_data=options.debug, pconf=pconf, email_force_tld=options.email_force_tld) m = ep.decode_email_bytes(raw_email) print(json.dumps(m, default=json_serial))
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-p', dest='path', help='Path to scan for EML files.', required=True) parser.add_argument( '-o', dest='outpath', default='.', help='Path where to save attachments in (default is current directory).' ) options = parser.parse_args() scan_path = pathlib.Path(options.path) out_path = pathlib.Path(options.outpath) if not scan_path.is_dir(): raise SystemExit('Specified path is not accessible') if not out_path.is_dir(): out_path.mkdir() ep = eml_parser.EmlParser(include_attachment_data=True) for k in scan_path.iterdir(): if k.suffix == '.eml': print(f'Parsing: {str(k)}') m = ep.decode_email(k) if 'attachment' in m: for a in m['attachment']: out_filepath = out_path / a['filename'] print(f'\tWriting attachment: {out_filepath}') with out_filepath.open('wb') as a_out: a_out.write(base64.b64decode(a['raw'])) print()
def EML_Parsing(raw_email): ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=False, ) parsed_eml = ep.decode_email_bytes(raw_email) return parsed_eml
async def get_emails_imap( self, username, password, imap_server, foldername, amount, unread, fields, include_raw_body, include_attachment_data, ): if type(amount) == str: try: amount = int(amount) except ValueError: return "Amount needs to be a number, not %s" % amount try: email = imaplib.IMAP4_SSL(imap_server) except ConnectionRefusedError as error: try: email = imaplib.IMAP4(imap_server) email.starttls() except socket.gaierror as error: return "Can't connect to IMAP server %s: %s" % (imap_server, error) except socket.gaierror as error: return "Can't connect to IMAP server %s: %s" % (imap_server, error) try: email.login(username, password) except imaplib.IMAP4.error as error: return "Failed to log into %s: %s" % (username, error) email.select(foldername) unread = True if unread.lower().strip() == "true" else False try: # IMAP search queries, e.g. "seen" or "read" # https://www.rebex.net/secure-mail.net/features/imap-search.aspx mode = "(UNSEEN)" if unread else "ALL" thistype, data = email.search(None, mode) except imaplib.IMAP4.error as error: return "Couldn't find folder %s." % (foldername) email_ids = data[0] id_list = email_ids.split() if id_list == None: return "Couldn't retrieve email. Data: %s" % data try: print("LIST: ", len(id_list)) except TypeError: return "Error getting email. Data: %s" % data emails = [] error = None if type(fields) == str and fields.strip() != "": fields = {k.strip(): k.strip() for k in fields.split(",")} else: fields = None include_raw_body = True if include_raw_body.lower().strip( ) == "true" else False include_attachment_data = (True if include_attachment_data.lower().strip() == "true" else False) ep = eml_parser.EmlParser( include_attachment_data=include_attachment_data, include_raw_body=include_raw_body, ) for i in range(len(id_list) - 1, len(id_list) - amount - 1, -1): resp, data = email.fetch(id_list[i], "(RFC822)") if resp != "OK": print("Failed getting %s" % id_list[i]) continue if data == None: continue # Convert email in json try: data = json.loads( json.dumps(ep.decode_email_bytes(data[0][1]), default=default)) except UnicodeDecodeError as err: print("Failed to decode part of email %s" % id_list[i]) error = "Failed to decode email %s" % id_list[i] except IndexError as err: print("Indexerror: %s" % err) error = "Something went wrong while parsing. Check logs." # Pick only selected fields if specified try: data = glom.glom(data, fields) if fields else parsed_eml except glom.core.PathAccessError: print("Required fields are not valid") error = "Required fields are not valid" if error: emails.append({ "id": id_list[i].decode("utf-8"), "error": error }) else: emails.append(data) return json.dumps(emails)
def process(self, feed_entry): feed_payload = feed_entry.payload sha256 = feed_payload['sha256'] data = feed_entry.payload['msg'] try: # Catches unexpected exceptions of eml_parser.EmlParser ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) eml_dict = ep.decode_email_bytes(data.encode("utf-8")) except BaseException as e: logger.error(e) logger.error(f"eml_parser unable to parse message - {sha256}") return None, None attachments, files = self.extract_attachments(eml_dict) ts = self.retrieve_datetime_in_utc(eml_dict) src_raw = self.find_sender(eml_dict) source = NetworkEntityFactory.get_from_ip(src_raw, 25, EntityEnum.smtp_server, timestamp=ts) dest_raw = self.find_receiver(eml_dict) dest = NetworkEntityFactory.get_from_ip(dest_raw, 25, EntityEnum.honeypot, timestamp=ts) urls = [] domains = [] message = [] if eml_dict.get("body"): # Extract URLs urls = self.extract_urls_dm(eml_dict) domains = self.extract_domains(eml_dict) # Extract content for preview message = eml_dict.get("body", {})[0].get("content", "") message = message[:self.MSG_THRESHOLD] if len(message) > self.MSG_THRESHOLD else message cc = eml_dict['header'].get("cc", []) message_id_list = eml_dict['header']['header'].get("message-id") message_id = message_id_list[0] if message_id_list else "" # Extract subject line and handle empty subject subject = eml_dict['header']['subject'] reply_to_raw = eml_dict['header']['header'].get("reply-to", None) reply_to = self.sanitize_address(reply_to_raw[0]) if reply_to_raw else "" return_path_raw = eml_dict['header']['header'].get("return-path", None) return_path = self.sanitize_address(return_path_raw[0]) if return_path_raw else "" sender = eml_dict['header'].get("from", "") recipients = eml_dict['header'].get("to", "") related = eml_dict['header'].get('received_ip', []) size = len(feed_entry.payload["msg"].encode('utf-8')) to = eml_dict["header"]["to"] m = Email( file_id=feed_entry._id, attachments=attachments, attachment_count=len(attachments), cc=[Address(c) for c in cc], destination=dest, domains=domains, message=message, message_id=message_id, observer=Observer(feed_entry.identifier), recipients=[Address(r) for r in recipients], related=related, reply_to=Address(reply_to), return_path=Address(return_path), sender=Address(sender), sha256=sha256, source=source, size=size, subject=subject, to=[Address(t) for t in to], timestamp=ts, urls=[u.url for u in urls], is_enriched=True ) logger.debug("Mail successfully processed") return m, [*files, *urls]
import datetime import json import eml_parser from helpers import constants import os FILENAME = 'Email_object_example.eml' def json_serial(obj): if isinstance(obj, datetime.datetime): serial = obj.isoformat() return serial if __name__ == '__main__': with open(os.path.join(constants.PATH, FILENAME), 'rb') as fhdl: raw_email = fhdl.read() ep = eml_parser.EmlParser() parsed_eml = ep.decode_email_bytes(raw_email) print(json.dumps(parsed_eml, default=json_serial))
def get_emails( self, username, password, server, build, account, verifyssl, foldername, category, amount, unread, fields, include_raw_body, include_attachment_data, upload_email_shuffle, upload_attachments_shuffle, ): def path_to_dict(path, value=None): def pack(parts): return ({ parts[0]: pack(parts[1:]) if len(parts) > 1 else value } if len(parts) > 1 else { parts[0]: value }) return pack(path.split(".")) def merge(d1, d2): for k in d2: if k in d1 and isinstance(d1[k], dict) and isinstance( d2[k], dict): merge(d1[k], d2[k]) else: d1[k] = d2[k] # Authenticate auth = self.authenticate(username, password, server, build, account, verifyssl) if auth["error"]: return json.dumps({"success": False, "reason": auth["error"]}) account = auth["account"] # Parse email folder folder = self.parse_folder(account, foldername) if folder["error"]: return json.dumps({"success": False, "reason": folder["error"]}) folder = folder["folder"] if type(amount) == str: try: amount = int(amount) except ValueError: return json.dumps({ "success": False, "account": None, "error": "Amount needs to be a number, not %s" % amount, }) # Get input from gui unread = True if unread.lower().strip() == "true" else False category = category.lower().strip() include_raw_body = True if include_raw_body.lower().strip( ) == "true" else False include_attachment_data = (True if include_attachment_data.lower().strip() == "true" else False) upload_email_shuffle = (True if upload_email_shuffle.lower().strip() == "true" else False) upload_attachments_shuffle = ( True if upload_attachments_shuffle.lower().strip() == "true" else False) # Convert <amount> of mails in json emails = [] ep = eml_parser.EmlParser( include_attachment_data=include_attachment_data, include_raw_body=include_raw_body, ) try: if category: folder_filter = folder.filter( is_read=not unread, categories__icontains=category).order_by( "-datetime_received")[:amount] else: folder_filter = folder.filter( is_read=not unread).order_by("-datetime_received")[:amount] for email in folder_filter: output_dict = {} parsed_eml = ep.decode_email_bytes(email.mime_content) if fields and fields.strip() != "": for field in fields.split(","): field = field.strip() merge( output_dict, path_to_dict( field, glom(parsed_eml, field, default=None), ), ) else: output_dict = parsed_eml # Add message_id as top returned field output_dict["message_id"] = parsed_eml["header"]["header"][ "message-id"][0] output_dict["message_id"] = output_dict["message_id"].replace( "\t", "").strip() # Add categories to output dict output_dict["categories"] = email.categories if upload_email_shuffle: email_up = [{ "filename": "email.msg", "data": email.mime_content }] email_id = self.set_files(email_up) output_dict["email_fileid"] = email_id[0] if upload_attachments_shuffle: atts_up = [{ "filename": attachment.name, "data": attachment.content } for attachment in email.attachments if type(attachment) == FileAttachment] atts_ids = self.set_files(atts_up) output_dict["attachment_uids"] = atts_ids try: if len(output_dict["body"]) > 1: output_dict["body"][0]["raw_body"] = output_dict[ "body"][1]["content"] except KeyError as e: print("OK KeyError (1): %s" % e) except IndexError as e: print("OK IndexError (1): %s" % e) try: if len(output_dict["body"]) > 0: output_dict["body"] = output_dict["body"][0] except KeyError as e: print("OK KeyError (2): %s" % e) except IndexError as e: print("OK IndexError (2): %s" % e) try: del output_dict["attachment"] except KeyError as e: print("Ok Error (3): %s" % e) except IndexError as e: print("OK IndexError (3): %s" % e) print("Appending email") emails.append(output_dict) except Exception as err: return json.dumps({ "success": False, "reason": "Error during email processing: {}".format(err) }) print("FINISHED - RETURNING") message = { "success": True, "messages": emails, } print(message) return json.dumps(message, default=default)
def parseEml(filepath, job_directory, wkhtmltoimage): ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) with open(filepath, 'rb') as f: raw_email = f.read() decoded_email = ep.decode_email_bytes(raw_email) ## ## Results ## result = dict() iocs = dict() iocs['ip'] = list() iocs['domain'] = list() iocs['url'] = list() iocs['email'] = list() iocs['hash'] = list() iocs['files'] = list() ## ## Extract raw email ## result['raw_email'] = raw_email.decode('utf-8') ## ## Extract SMTP envelope ## headers = dict() headers['return-path'] = decoded_email.get('header').get('header').get( 'return-path', '') headers['delivered-to'] = decoded_email.get('header').get('header').get( 'delivered-to', '') headers['x-delivered-to'] = decoded_email.get('header').get('header').get( 'x-delivered-to', '') ## ## Extract Headers ## headers['from'] = decoded_email.get('header').get('header').get('from', []) headers['to'] = decoded_email.get('header').get('header').get('to', []) headers['cc'] = decoded_email.get('header').get('header').get('cc', []) headers['bcc'] = decoded_email.get('header').get('header').get('bcc', []) headers['reply-to'] = decoded_email.get('header').get('header').get( 'reply-to', []) headers['subject'] = decoded_email.get('header').get('header').get( 'subject', '') headers['date'] = decoded_email.get('header').get('header').get( 'date', '')[0] headers['received'] = decoded_email.get('header').get('received') # Make dates ready for json for h in headers['received']: if isinstance(h.get('date'), datetime.datetime): d = h.get('date').isoformat() h['date'] = d result['headers'] = headers ## ## Extract body text/plain and text/html ## body = dict() if 'body' in decoded_email: body['text_plain'] = list() body['text_html'] = list() for b in decoded_email.get('body'): ## text/plain if b.get('content_type') == "text/plain": body['text_plain'].append(b) b['beautified_text'] = BeautifulSoup(b.get('content'), 'html.parser').prettify() iocs['url'].extend(ep.get_uri_ondata(b.get('content'))) ## text/html elif b.get('content_type') == "text/html": iocs['url'].extend(ep.get_uri_ondata(b.get('content'))) ## Generate rendering image if option is enabled if wkhtmltoimage.get('enable'): img_file = convert_png(b.get('content'), 0, wkhtmltoimage.get('path'), "/tmp") b['rendered_html'] = "data:{};base64,{}".format( "image/png", base64_image(img_file.get('img_path'), wkhtmltoimage.get('width_size'))) b['beautified_html'] = BeautifulSoup( b.get('content'), 'html.parser').prettify() body['text_html'].append(b) result['body'] = body ## ## Extract Attachments ## result['attachments'] = list() if 'attachment' in decoded_email.keys(): for a in decoded_email.get('attachment'): a['mime'] = magic.from_buffer(binascii.a2b_base64(a.get('raw'))) if isinstance(a.get('raw'), bytes): filepath = os.path.join(job_directory, 'output', a.get('filename', '')) with open(filepath, 'wb') as f: f.write(base64.b64decode(a['raw'])) f.close() a['raw'] = a.get('raw').decode('ascii') result['attachments'].append(a) iocs['hash'].extend([{ 'hash': a.get('hash').get('sha256'), 'filename': a.get('filename') }]) ## ## Extract IOCs ## iocs['ip'].extend(decoded_email.get('header').get('received_ip', [])) iocs['domain'].extend( decoded_email.get('header').get('received_domain', [])) ### Email for field in [ 'cc', 'bcc', 'delivered_to', 'received_foremail', ]: iocs['email'].extend(decoded_email.get('header').get(field, [])) iocs['email'].append(decoded_email.get('header').get('from', '')) result['iocs'] = iocs return result
def get_emails_imap( self, username, password, imap_server, foldername, amount, unread, fields, include_raw_body, include_attachment_data, upload_email_shuffle, upload_attachments_shuffle, ssl_verify="True" ): def path_to_dict(path, value=None): def pack(parts): return ( {parts[0]: pack(parts[1:]) if len(parts) > 1 else value} if len(parts) > 1 else {parts[0]: value} ) return pack(path.split(".")) def merge(d1, d2): for k in d2: if k in d1 and isinstance(d1[k], dict) and isinstance(d2[k], dict): merge(d1[k], d2[k]) else: d1[k] = d2[k] if type(amount) == str: try: amount = int(amount) except ValueError: return { "success": False, "reason": "Amount needs to be a number, not %s" % amount, } try: email = imaplib.IMAP4_SSL(imap_server) except ConnectionRefusedError as error: try: email = imaplib.IMAP4(imap_server) if ssl_verify == "false" or ssl_verify == "False": pass else: email.starttls() except socket.gaierror as error: return { "success": False, "reason": "Can't connect to IMAP server %s: %s" % (imap_server, error), } except socket.gaierror as error: return { "success": False, "reason": "Can't connect to IMAP server %s: %s" % (imap_server, error), } try: email.login(username, password) except imaplib.IMAP4.error as error: return { "success": False, "reason": "Failed to log into %s: %s" % (username, error), } email.select(foldername) unread = True if unread.lower().strip() == "true" else False try: # IMAP search queries, e.g. "seen" or "read" # https://www.rebex.net/secure-mail.net/features/imap-search.aspx mode = "(UNSEEN)" if unread else "ALL" thistype, data = email.search(None, mode) except imaplib.IMAP4.error as error: return { "success": False, "reason": "Couldn't find folder %s." % (foldername), } email_ids = data[0] id_list = email_ids.split() if id_list == None: return { "success": False, "reason": f"Couldn't retrieve email. Data: {data}", } try: self.logger.info(f"LIST: {id_list}") except TypeError: return { "success": False, "reason": "Error getting email. Data: %s" % data, } include_raw_body = True if include_raw_body.lower().strip() == "true" else False include_attachment_data = ( True if include_attachment_data.lower().strip() == "true" else False ) upload_email_shuffle = ( True if upload_email_shuffle.lower().strip() == "true" else False ) upload_attachments_shuffle = ( True if upload_attachments_shuffle.lower().strip() == "true" else False ) # Convert <amount> of mails in json emails = [] ep = eml_parser.EmlParser( include_attachment_data=include_attachment_data or upload_attachments_shuffle, include_raw_body=include_raw_body, ) if len(id_list) == 0: return { "success": True, "messages": [], } try: amount = len(id_list) if len(id_list)<amount else amount for i in range(len(id_list) - 1, len(id_list) - amount - 1, -1): resp, data = email.fetch(id_list[i], "(RFC822)") error = None if resp != "OK": self.logger.info("Failed getting %s" % id_list[i]) continue if data == None: continue output_dict = {} parsed_eml = ep.decode_email_bytes(data[0][1]) if fields and fields.strip() != "": for field in fields.split(","): field = field.strip() merge( output_dict, path_to_dict( field, glom(parsed_eml, field, default=None), ), ) else: output_dict = parsed_eml output_dict["imap_id"] = id_list[i] output_dict["attachment"] = [] output_dict["attachment_uids"] = [] # Add message-id as top returned field output_dict["message_id"] = parsed_eml["header"]["header"][ "message-id" ][0] if upload_email_shuffle: email_up = [{"filename": "email.msg", "data": data[0][1]}] email_id = self.set_files(email_up) output_dict["email_uid"] = email_id[0] if upload_attachments_shuffle: #self.logger.info(f"EML: {parsed_eml}") try: atts_up = [ { "filename": x["filename"], "data": base64.b64decode(x["raw"]), } for x in parsed_eml["attachment"] ] atts_ids = self.set_files(atts_up) output_dict["attachments_uids"] = atts_ids except Exception as e: self.logger.info(f"Major issue with EML attachment - are there attachments: {e}") emails.append(output_dict) except Exception as err: return { "success": False, "reason": "Error during email processing: {}".format(err), } try: to_return = { "success": True, "messages": json.loads(json.dumps(emails, default=default)), } self.logger.info(f"Emails: {to_return}") return to_return except: return { "success": True, "messages": json.dumps(emails, default=default), }
async def get_emails( self, username, password, server, build, account, verifyssl, foldername, amount, unread, fields, include_raw_body, include_attachment_data, ): def path_to_dict(path, value=None): def pack(parts): return ({ parts[0]: pack(parts[1:]) if len(parts) > 1 else value } if len(parts) > 1 else { parts[0]: value }) return pack(path.split(".")) def merge(d1, d2): for k in d2: if k in d1 and isinstance(d1[k], dict) and isinstance( d2[k], dict): merge(d1[k], d2[k]) else: d1[k] = d2[k] # Authenticate auth = await self.authenticate(username, password, server, build, account, verifyssl) if auth["error"]: return auth["error"] account = auth["account"] # Parse email folder folder = await self.parse_folder(account, foldername) if folder["error"]: return folder["error"] folder = folder["folder"] if type(amount) == str: try: amount = int(amount) except ValueError: return { "account": None, "error": "Amount needs to be a number, not %s" % amount, } # Get input from gui unread = True if unread.lower().strip() == "true" else False include_raw_body = True if include_raw_body.lower().strip( ) == "true" else False include_attachment_data = (True if include_attachment_data.lower().strip() == "true" else False) # Convert <amount> of mails in json emails = [] ep = eml_parser.EmlParser( include_attachment_data=include_attachment_data, include_raw_body=include_raw_body, ) try: for email in folder.filter(is_read=not unread).order_by( "-datetime_received")[:amount]: output_dict = {} parsed_eml = ep.decode_email_bytes(email.mime_content) if fields and fields.strip() != "": for field in fields.split(","): field = field.strip() merge( output_dict, path_to_dict( field, glom(parsed_eml, field, default=None), ), ) else: output_dict = parsed_eml emails.append(output_dict) except Exception as err: return "Error during email processing: {}".format(err) return json.dumps(emails, default=default)
import datetime import json import eml_parser def json_serial(obj): if isinstance(obj, datetime.datetime): serial = obj.isoformat() return serial with open('conf_emails/1.eml', 'rb') as fhdl: raw_email = fhdl.read() ep = eml_parser.EmlParser(include_raw_body=True) parsed_eml = ep.decode_email_bytes(raw_email) print(json.dumps(parsed_eml, default=json_serial))
def parseMail(mail): parser = eml_parser.EmlParser( include_attachment_data=True, parse_attachments=True) return parser.decode_email_bytes(mail)
return flair_sentiment flair_sentiment = flair_set_up() database = "proofpointDB.db" conn = create_connection(database) with open('directory_path.config', 'rb') as config: directory = config.read().decode("utf-8") for filename in os.listdir(directory): if filename.endswith(".eml"): print(filename, end='') with open(filename, 'rb') as fhdl: raw_email = fhdl.read() ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=False, ) parsed_eml = ep.decode_email_bytes(raw_email) #checks if there is an attachment in the email by checking #if there is an attachment key in the parsed eml file if ('attachment' in parsed_eml.keys()): attachment = True else: attachment = False #checks if there is a url specified in the body of the email, does not #include an image or something with a link attached to it try: url_list = parsed_eml['body'][1]['uri'] url = True except KeyError: url = False