def createRow(email_id, mail, attach, msg_body, body_type, categories):

    one = lambda arr : head(arr) if arr else ''

    originating_ips = mail.get_all('x-originating-ip', [])
    forensic_bcc = mail.get_all('x-libpst-forensic-bcc', [])
    msgid = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('message-id', [])]
    inreplyto = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('in-reply-to', [])]
    references = [clean_string(s, [(r'\n', ' '),  EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('references', [])]
    mail_date= mail.get_all('date', None)

    subject = mail.get_all('subject', [])
    subject = convert_encoded(one(subject))

    senders, senders_line = addrs(mail.get_all('from', []))

    # Parse and convert all addresses to utf-8
    # results are tuple (addr, addr_line) where the _line variables contain the full email header and
    # the first value is a list of only the address portion

    tos, tos_line = addrs(mail.get_all('to', []))
    delivered_to, delivered_to_line = addrs(mail.get_all('Delivered-To', []))
    # Sometime tos is the string "Undisclosed-Recipient" in which case we can try the delivery address
    if not tos and delivered_to:
        tos = delivered_to
    ccs, ccs_line = addrs(mail.get_all('cc', []))
    bccs, bccs_line = addrs(mail.get_all('bcc', []))

    doc = { "id": email_id,
            "datetime": dateToUTCstr(head(mail_date)) if mail_date else None,
            "originating_ips" : originating_ips,
            "categories" : categories,
            "senders": senders,
            "senders_line": senders_line,
            "tos": tos,
            "tos_line": tos_line,
            "delivered_to": delivered_to,
            "delivered_to_line": delivered_to_line,
            "ccs": ccs,
            "ccs_line" : ccs_line,
            "bccs": bccs,
            "bccs_line" : bccs_line,
            "forensic-bcc" : forensic_bcc,
            "attachments": attach,
            "messageid": msgid,
            "inreplyto": inreplyto,
            "references": references,
            "subject": subject,
            "body": msg_body,
            "body_as_html": body_type == 'html'
            }
    return doc
def createRow(email_id, mail, attach, msg_body, body_type, categories):

    one = lambda arr: head(arr) if arr else ''

    originating_ips = mail.get_all('x-originating-ip', [])
    forensic_bcc = mail.get_all('x-libpst-forensic-bcc', [])
    msgid = [
        clean_string(
            s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('message-id', [])
    ]
    inreplyto = [
        clean_string(
            s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('in-reply-to', [])
    ]
    references = [
        clean_string(
            s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('references', [])
    ]
    mail_date = mail.get_all('date', None)

    subject = mail.get_all('subject', [])
    subject = convert_encoded(one(subject))

    senders, senders_line = addrs(mail.get_all('from', []))

    # Parse and convert all addresses to utf-8
    # results are tuple (addr, addr_line) where the _line variables contain the full email header and
    # the first value is a list of only the address portion

    tos, tos_line = addrs(mail.get_all('to', []))
    delivered_to, delivered_to_line = addrs(mail.get_all('Delivered-To', []))
    # Sometime tos is the string "Undisclosed-Recipient" in which case we can try the delivery address
    if not tos and delivered_to:
        tos = delivered_to
    ccs, ccs_line = addrs(mail.get_all('cc', []))
    bccs, bccs_line = addrs(mail.get_all('bcc', []))

    doc = {
        "id": email_id,
        "datetime": dateToUTCstr(head(mail_date)) if mail_date else None,
        "originating_ips": originating_ips,
        "categories": categories,
        "senders": senders,
        "senders_line": senders_line,
        "tos": tos,
        "tos_line": tos_line,
        "delivered_to": delivered_to,
        "delivered_to_line": delivered_to_line,
        "ccs": ccs,
        "ccs_line": ccs_line,
        "bccs": bccs,
        "bccs_line": bccs_line,
        "forensic-bcc": forensic_bcc,
        "attachments": attach,
        "messageid": msgid,
        "inreplyto": inreplyto,
        "references": references,
        "subject": subject,
        "body": msg_body,
        "body_as_html": body_type == 'html'
    }
    return doc
Esempio n. 3
0
def createRow(email_id, mail, attach, msg_body, categories):
    #addr_tostr = lambda arr : ";".join(arr)
    #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for
    #name, addr in getaddresses(arr)]

    #csv_sep = lambda arr : ",".join(arr) if arr else ''
    #scolon_sep = lambda arr : ";".join(arr) if arr else ''

    ##
    ## return tuple (extracted emails array, unprocessed parts as array)
    ##
    def addrs(arr):
        items = []
        arr = [
            clean_string(s.lower(),
                         [EXPR_OPTS['fix_utf8'], (r'\t', ';'), (r'\n', ';')])
            for s in arr
        ]
        for name, addr in getaddresses(arr):
            if '@' in addr:
                items.append(addr)
            elif '@' in name:
                items.append(name)
        return ([clean_string(s.lower(), [(r'\'', '')]) for s in items], arr)

    one = lambda arr: head(arr) if arr else ''

    originating_ips = mail.get_all('x-originating-ip', [])
    forensic_bcc = mail.get_all('x-libpst-forensic-bcc', [])
    msgid = [
        clean_string(
            s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('message-id', [])
    ]
    inreplyto = [
        clean_string(
            s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('in-reply-to', [])
    ]
    references = [
        clean_string(
            s, [(r'\n', ' '), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab']])
        for s in mail.get_all('references', [])
    ]
    mail_date = mail.get_all('date', None)
    subject = mail.get_all('subject', [])
    #importance ??
    #ip ??
    senders, senders_line = addrs(mail.get_all('from', []))
    #senders = [target_email if s == 'mailer-daemon' else s for s in senders]

    tos, tos_line = addrs(mail.get_all('to', []))
    ccs, ccs_line = addrs(mail.get_all('cc', []))
    bccs, bccs_line = addrs(mail.get_all('bcc', []))
    subject = clean_string(
        quopri.decodestring(one(subject)),
        [EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']])

    body = clean_string(
        quopri.decodestring(msg_body),
        [EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_cr']])

    doc = {
        "id": email_id,
        "datetime": dateToUTCstr(head(mail_date)) if mail_date else None,
        "originating_ips": originating_ips,
        "categories": categories,
        "senders": senders,
        "senders_line": senders_line,
        "tos": tos,
        "tos_line": tos_line,
        "ccs": ccs,
        "ccs_line": ccs_line,
        "bccs": bccs,
        "bccs_line": bccs_line,
        "forensic-bcc": forensic_bcc,
        "attachments": attach,
        "messageid": msgid,
        "inreplyto": inreplyto,
        "references": references,
        "subject": subject,
        "body": body
    }
    return json.dumps(doc)
def createRow(email_id, mail, attach, msg_body, categories):
    #addr_tostr = lambda arr : ";".join(arr)
    #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for
    #name, addr in getaddresses(arr)]

    #csv_sep = lambda arr : ",".join(arr) if arr else ''
    #scolon_sep = lambda arr : ";".join(arr) if arr else '' 

    ##
    ## return tuple (extracted emails array, unprocessed parts as array)
    ##
    def addrs(arr):
        items = []
        arr = [clean_string(s.lower(), [EXPR_OPTS['fix_utf8'], (r'\t', ';'), (r'\n', ';') ]) for s in arr]
        for name, addr in getaddresses(arr):
            if '@' in addr:
                items.append(addr)
            elif '@' in name:
                items.append(name)
        return  ([clean_string(s.lower(), [(r'\'', '')]) for s in items], arr)


    one = lambda arr : head(arr) if arr else ''

    originating_ips = mail.get_all('x-originating-ip', [])
    forensic_bcc = mail.get_all('x-libpst-forensic-bcc', [])
    msgid = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('message-id', [])] 
    inreplyto = [clean_string(s, [(r'\n', ''), EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('in-reply-to', [])] 
    references = [clean_string(s, [(r'\n', ' '),  EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'] ]) for s in mail.get_all('references', [])]
    mail_date= mail.get_all('date', None)
    subject = mail.get_all('subject', [])
    #importance ??
    #ip ??
    senders, senders_line = addrs(mail.get_all('from', []))
    #senders = [target_email if s == 'mailer-daemon' else s for s in senders]

    tos, tos_line = addrs(mail.get_all('to', []))
    ccs, ccs_line = addrs(mail.get_all('cc', []))
    bccs, bccs_line = addrs(mail.get_all('bcc', []))
    subject = clean_string(quopri.decodestring(one(subject)),
                     [            
                         EXPR_OPTS['fix_utf8'], 
                         EXPR_OPTS['fix_tab'], 
                         EXPR_OPTS['fix_cr']])

    body = clean_string(quopri.decodestring(msg_body),
                     [            
                         EXPR_OPTS['fix_utf8'], 
                         EXPR_OPTS['fix_tab'], 
                         EXPR_OPTS['fix_cr']])
    
    doc = { "id": email_id,
            "datetime": dateToUTCstr(head(mail_date)) if mail_date else None,
            "originating_ips" : originating_ips,
            "categories" : categories,
            "senders": senders,
            "senders_line": senders_line,
            "tos": tos,
            "tos_line": tos_line,
            "ccs": ccs,
            "ccs_line" : ccs_line,
            "bccs": bccs,
            "bccs_line" : bccs_line,
            "forensic-bcc" : forensic_bcc,
            "attachments": attach,
            "messageid": msgid,
            "inreplyto": inreplyto,
            "references": references,
            "subject": subject,
            "body": body
    }
    return json.dumps(doc)