コード例 #1
0
def email_parser(email_file):
    html_flag = 0
    with open(email_file, 'rb') as fp:
        msg = BytesParser(policy=policy.default).parse(fp)
    # print('Subject:', msg['subject'])

    if not msg.is_multipart():
        # print("Singular email")
        if msg.get_content_maintype() == "text":
            if msg.get_content_subtype() == "plain":
                # print(msg.get_content_type())
                body = msg.get_body(preferencelist='text/plain')
                # print(body)
            elif msg.get_content_subtype() == "html":
                # print(msg.get_content_type())
                body = msg.get_body(preferencelist='html')
                # print("----Body from get_body()-------")
                # print(body)
                html_body = str(body).split("\n")[3:]
                html_body = '\n'.join(html_body)
                # print("----Parsed text through beautiful soup-------")
                body = html_parse(html_body)
                # print(body)
            else:
                print("Don't know if html or text {}".format(
                    msg.get_content_subtype()))
    else:
        print("Email is multipart")
        i = 0
        for part in msg.walk():
            i = i + 1
            print("part " + str(i))
            cdispo = str(part.get('Content-Disposition'))
            print(cdispo)
            print(part.get_content_type())
            print(part.get_content_subtype())
            if part.get_content_type(
            ) == 'multipart/alternative' or part.get_content_type(
            ) == 'multipart/related':
                body = part.get_body(preferencelist='html')
                print("----Body from get_body()-------")
                print(body)
                html_body = str(body).split("\n")[3:]
                html_body = '\n'.join(html_body)
                print("----Parsed text through beautiful soup-------")
                body = html_parse(html_body)
                print(body)
            if part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)  # decode
                print(body)
                break
    return body
コード例 #2
0
# You can also access the parts of the addresses:
# print('Recipient username: {}'.format(eml['to'].addresses[0].username))
# print('Sender name: {}'.format(eml['from'].addresses[0].display_name))
template['from_display_name'] = eml['from'].addresses[0].display_name

ctype = eml.get_content_maintype()
if ctype == 'multipart':
    for part in eml.get_payload():
        subctype = part.get_content_maintype()
        if subctype == 'text':
            if part.get_content_subtype() == 'plain':
                template['content_text'] = part.get_payload()
            elif part.get_content_subtype() == 'html':
                template['content_html'] = part.get_payload()
elif ctype == 'text':
    if eml.get_content_subtype() == 'plain':
        template['content_text'] = eml.get_payload()
    elif eml.get_content_subtype == 'html':
        template['content_html'] = eml.get_payload()
else:
    print('nope...')

templates = []
templates.append(template)

# pprint.pprint(template)

output = 'output.et'
with open(output, 'w') as out:
    out.write(json.dumps(templates))
#print(str(eml.keys()))