def createRow(email_id, mail, attach, msg_body, body_type, categories): one = lambda arr : head(arr) if arr else '' originating_ips = mail.get_all('x-originating-ip', []) forensic_bcc = mail.get_all('x-libpst-forensic-bcc', []) msgid = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('message-id', [])] inreplyto = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('in-reply-to', [])] references = [clean_string(s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('references', [])] mail_date= mail.get_all('date', None) subject = mail.get_all('subject', []) subject = convert_encoded(one(subject)) senders, senders_line = addrs(mail.get_all('from', [])) # Parse and convert all addresses to utf-8 # results are tuple (addr, addr_line) where the _line variables contain the full email header and # the first value is a list of only the address portion tos, tos_line = addrs(mail.get_all('to', [])) delivered_to, delivered_to_line = addrs(mail.get_all('Delivered-To', [])) # Sometime tos is the string "Undisclosed-Recipient" in which case we can try the delivery address if not tos and delivered_to: tos = delivered_to ccs, ccs_line = addrs(mail.get_all('cc', [])) bccs, bccs_line = addrs(mail.get_all('bcc', [])) doc = { "id": email_id, "datetime": dateToUTCstr(head(mail_date)) if mail_date else None, "originating_ips" : originating_ips, "categories" : categories, "senders": senders, "senders_line": senders_line, "tos": tos, "tos_line": tos_line, "delivered_to": delivered_to, "delivered_to_line": delivered_to_line, "ccs": ccs, "ccs_line" : ccs_line, "bccs": bccs, "bccs_line" : bccs_line, "forensic-bcc" : forensic_bcc, "attachments": attach, "messageid": msgid, "inreplyto": inreplyto, "references": references, "subject": subject, "body": msg_body, "body_as_html": body_type == 'html' } return doc
def createRow(email_id, mail, attach, msg_body, body_type, categories): one = lambda arr: head(arr) if arr else '' originating_ips = mail.get_all('x-originating-ip', []) forensic_bcc = mail.get_all('x-libpst-forensic-bcc', []) msgid = [ clean_string( s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('message-id', []) ] inreplyto = [ clean_string( s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('in-reply-to', []) ] references = [ clean_string( s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('references', []) ] mail_date = mail.get_all('date', None) subject = mail.get_all('subject', []) subject = convert_encoded(one(subject)) senders, senders_line = addrs(mail.get_all('from', [])) # Parse and convert all addresses to utf-8 # results are tuple (addr, addr_line) where the _line variables contain the full email header and # the first value is a list of only the address portion tos, tos_line = addrs(mail.get_all('to', [])) delivered_to, delivered_to_line = addrs(mail.get_all('Delivered-To', [])) # Sometime tos is the string "Undisclosed-Recipient" in which case we can try the delivery address if not tos and delivered_to: tos = delivered_to ccs, ccs_line = addrs(mail.get_all('cc', [])) bccs, bccs_line = addrs(mail.get_all('bcc', [])) doc = { "id": email_id, "datetime": dateToUTCstr(head(mail_date)) if mail_date else None, "originating_ips": originating_ips, "categories": categories, "senders": senders, "senders_line": senders_line, "tos": tos, "tos_line": tos_line, "delivered_to": delivered_to, "delivered_to_line": delivered_to_line, "ccs": ccs, "ccs_line": ccs_line, "bccs": bccs, "bccs_line": bccs_line, "forensic-bcc": forensic_bcc, "attachments": attach, "messageid": msgid, "inreplyto": inreplyto, "references": references, "subject": subject, "body": msg_body, "body_as_html": body_type == 'html' } return doc
def createRow(email_id, mail, attach, msg_body, categories): #addr_tostr = lambda arr : ";".join(arr) #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for #name, addr in getaddresses(arr)] #csv_sep = lambda arr : ",".join(arr) if arr else '' #scolon_sep = lambda arr : ";".join(arr) if arr else '' ## ## return tuple (extracted emails array, unprocessed parts as array) ## def addrs(arr): items = [] arr = [ clean_string(s.lower(), [EXPR_OPTS['fix_utf8'], (r'\t', ';'), (r'\n', ';')]) for s in arr ] for name, addr in getaddresses(arr): if '@' in addr: items.append(addr) elif '@' in name: items.append(name) return ([clean_string(s.lower(), [(r'\'', '')]) for s in items], arr) one = lambda arr: head(arr) if arr else '' originating_ips = mail.get_all('x-originating-ip', []) forensic_bcc = mail.get_all('x-libpst-forensic-bcc', []) msgid = [ clean_string( s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('message-id', []) ] inreplyto = [ clean_string( s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('in-reply-to', []) ] references = [ clean_string( s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']]) for s in mail.get_all('references', []) ] mail_date = mail.get_all('date', None) subject = mail.get_all('subject', []) #importance ?? #ip ?? senders, senders_line = addrs(mail.get_all('from', [])) #senders = [target_email if s == 'mailer-daemon' else s for s in senders] tos, tos_line = addrs(mail.get_all('to', [])) ccs, ccs_line = addrs(mail.get_all('cc', [])) bccs, bccs_line = addrs(mail.get_all('bcc', [])) subject = clean_string( quopri.decodestring(one(subject)), [EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']]) body = clean_string( quopri.decodestring(msg_body), [EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']]) doc = { "id": email_id, "datetime": dateToUTCstr(head(mail_date)) if mail_date else None, "originating_ips": originating_ips, "categories": categories, "senders": senders, "senders_line": senders_line, "tos": tos, "tos_line": tos_line, "ccs": ccs, "ccs_line": ccs_line, "bccs": bccs, "bccs_line": bccs_line, "forensic-bcc": forensic_bcc, "attachments": attach, "messageid": msgid, "inreplyto": inreplyto, "references": references, "subject": subject, "body": body } return json.dumps(doc)
def createRow(email_id, mail, attach, msg_body, categories): #addr_tostr = lambda arr : ";".join(arr) #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for #name, addr in getaddresses(arr)] #csv_sep = lambda arr : ",".join(arr) if arr else '' #scolon_sep = lambda arr : ";".join(arr) if arr else '' ## ## return tuple (extracted emails array, unprocessed parts as array) ## def addrs(arr): items = [] arr = [clean_string(s.lower(), [EXPR_OPTS['fix_utf8'], (r'\t', ';'), (r'\n', ';') ]) for s in arr] for name, addr in getaddresses(arr): if '@' in addr: items.append(addr) elif '@' in name: items.append(name) return ([clean_string(s.lower(), [(r'\'', '')]) for s in items], arr) one = lambda arr : head(arr) if arr else '' originating_ips = mail.get_all('x-originating-ip', []) forensic_bcc = mail.get_all('x-libpst-forensic-bcc', []) msgid = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('message-id', [])] inreplyto = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('in-reply-to', [])] references = [clean_string(s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('references', [])] mail_date= mail.get_all('date', None) subject = mail.get_all('subject', []) #importance ?? #ip ?? senders, senders_line = addrs(mail.get_all('from', [])) #senders = [target_email if s == 'mailer-daemon' else s for s in senders] tos, tos_line = addrs(mail.get_all('to', [])) ccs, ccs_line = addrs(mail.get_all('cc', [])) bccs, bccs_line = addrs(mail.get_all('bcc', [])) subject = clean_string(quopri.decodestring(one(subject)), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']]) body = clean_string(quopri.decodestring(msg_body), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']]) doc = { "id": email_id, "datetime": dateToUTCstr(head(mail_date)) if mail_date else None, "originating_ips" : originating_ips, "categories" : categories, "senders": senders, "senders_line": senders_line, "tos": tos, "tos_line": tos_line, "ccs": ccs, "ccs_line" : ccs_line, "bccs": bccs, "bccs_line" : bccs_line, "forensic-bcc" : forensic_bcc, "attachments": attach, "messageid": msgid, "inreplyto": inreplyto, "references": references, "subject": subject, "body": body } return json.dumps(doc)