def email_parser(email_file):
    html_flag = 0
    with open(email_file, 'rb') as fp:
        msg = BytesParser(policy=policy.default).parse(fp)
    # print('Subject:', msg['subject'])

    if not msg.is_multipart():
        # print("Singular email")
        if msg.get_content_maintype() == "text":
            if msg.get_content_subtype() == "plain":
                # print(msg.get_content_type())
                body = msg.get_body(preferencelist='text/plain')
                # print(body)
            elif msg.get_content_subtype() == "html":
                # print(msg.get_content_type())
                body = msg.get_body(preferencelist='html')
                # print("----Body from get_body()-------")
                # print(body)
                html_body = str(body).split("\n")[3:]
                html_body = '\n'.join(html_body)
                # print("----Parsed text through beautiful soup-------")
                body = html_parse(html_body)
                # print(body)
            else:
                print("Don't know if html or text {}".format(
                    msg.get_content_subtype()))
    else:
        print("Email is multipart")
        i = 0
        for part in msg.walk():
            i = i + 1
            print("part " + str(i))
            cdispo = str(part.get('Content-Disposition'))
            print(cdispo)
            print(part.get_content_type())
            print(part.get_content_subtype())
            if part.get_content_type(
            ) == 'multipart/alternative' or part.get_content_type(
            ) == 'multipart/related':
                body = part.get_body(preferencelist='html')
                print("----Body from get_body()-------")
                print(body)
                html_body = str(body).split("\n")[3:]
                html_body = '\n'.join(html_body)
                print("----Parsed text through beautiful soup-------")
                body = html_parse(html_body)
                print(body)
            if part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)  # decode
                print(body)
                break
    return body
Exemple #2
0
def display_eml(eml_filepath): ## -> treba vyladit!!!
    with open(eml_filepath, 'rb') as eml_file:

        msg = BytesParser(policy=policy.default).parse(eml_file)
        text = msg.get_body(preferencelist=('plain')).get_content()
        # sk = get_info_from_mail_field(msg['from'])
        # eml_output = eml_file.read()
        eml_output = msg
        # eml_output = msg #get_all('Content-Dispositio
        found = []
        for part in msg.walk():
            if 'content-disposition' not in part:
                continue
            cdisp = part['content-disposition'].split(';')
            cdisp = [x.strip() for x in cdisp]
            if cdisp[0].lower() != 'attachment':
                continue
            parsed = {}
            for kv in cdisp[1:]:
                key, val = kv.split('=')
                if val.startswith('"'):
                    val = val.strip('"')
                elif val.startswith("'"):
                    val = val.strip("'")
                parsed[key] = val
            found.append((parsed, part))
        eml_output = {
                     "Odesílatel": msg.get('From'),
                     "Příjemce": msg.get('To'),
                     "Datum": msg.get('Date'),
                     "Předmět": msg.get('Subject'),
                     "Text zprávy": msg.get_body(preferencelist=('plain')).get_content(),
                     "Přílohy": found #[0]
                     }
        #print('eml_output',eml_output, msg.get('Cc'))
        if msg.get_content_maintype() == 'multipart':  # <--zjisti zda potrebujes - jinak smaz
            # loop on the parts of the mail
            for part in msg.walk():
            # find the attachment part - so skip all the other parts
                if part.get_content_maintype() == 'multipart': continue
                if part.get_content_maintype() == 'text':
                    content = part.get_body(preferencelist=('plain'))
                    if content:
                        output = part.get_body(preferencelist=('plain')).get_content()
                    else:
                        output = None
                    continue
                if part.get('Content-Disposition') == 'inline': continue
                if part.get('Content-Disposition') is None: continue
                # save the attachment in the program directory
                result_dict = {
                     "Odesílatel": msg.get('From'),
                     "Příjemce": msg.get('To'),
                     "Datum": msg.get('Date'),
                     "Předmět": msg.get('Subject'),
                     "Text zprávy": output, #msg.get_body(preferencelist=('plain')).get_content(),
                     "Přílohy": part.get_all('Content-Disposition')
                     }
                #eml_output = result_dict
                #print('result_dict',result_dict)
    return eml_output
Exemple #3
0
]
template = {key: '' for key in keys}

#  Now the header items can be accessed as a dictionary:
# print('To: {}'.format(eml['to']))
# print('From: {}'.format(eml['from']))
template['from'] = eml['from']
# print('Subject: {}'.format(eml['subject']))
template['subject'] = eml['subject']

# You can also access the parts of the addresses:
# print('Recipient username: {}'.format(eml['to'].addresses[0].username))
# print('Sender name: {}'.format(eml['from'].addresses[0].display_name))
template['from_display_name'] = eml['from'].addresses[0].display_name

ctype = eml.get_content_maintype()
if ctype == 'multipart':
    for part in eml.get_payload():
        subctype = part.get_content_maintype()
        if subctype == 'text':
            if part.get_content_subtype() == 'plain':
                template['content_text'] = part.get_payload()
            elif part.get_content_subtype() == 'html':
                template['content_html'] = part.get_payload()
elif ctype == 'text':
    if eml.get_content_subtype() == 'plain':
        template['content_text'] = eml.get_payload()
    elif eml.get_content_subtype == 'html':
        template['content_html'] = eml.get_payload()
else:
    print('nope...')