def email_parser(email_file): html_flag = 0 with open(email_file, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) # print('Subject:', msg['subject']) if not msg.is_multipart(): # print("Singular email") if msg.get_content_maintype() == "text": if msg.get_content_subtype() == "plain": # print(msg.get_content_type()) body = msg.get_body(preferencelist='text/plain') # print(body) elif msg.get_content_subtype() == "html": # print(msg.get_content_type()) body = msg.get_body(preferencelist='html') # print("----Body from get_body()-------") # print(body) html_body = str(body).split("\n")[3:] html_body = '\n'.join(html_body) # print("----Parsed text through beautiful soup-------") body = html_parse(html_body) # print(body) else: print("Don't know if html or text {}".format( msg.get_content_subtype())) else: print("Email is multipart") i = 0 for part in msg.walk(): i = i + 1 print("part " + str(i)) cdispo = str(part.get('Content-Disposition')) print(cdispo) print(part.get_content_type()) print(part.get_content_subtype()) if part.get_content_type( ) == 'multipart/alternative' or part.get_content_type( ) == 'multipart/related': body = part.get_body(preferencelist='html') print("----Body from get_body()-------") print(body) html_body = str(body).split("\n")[3:] html_body = '\n'.join(html_body) print("----Parsed text through beautiful soup-------") body = html_parse(html_body) print(body) if part.get_content_type() == 'text/plain': body = part.get_payload(decode=True) # decode print(body) break return body
# You can also access the parts of the addresses: # print('Recipient username: {}'.format(eml['to'].addresses[0].username)) # print('Sender name: {}'.format(eml['from'].addresses[0].display_name)) template['from_display_name'] = eml['from'].addresses[0].display_name ctype = eml.get_content_maintype() if ctype == 'multipart': for part in eml.get_payload(): subctype = part.get_content_maintype() if subctype == 'text': if part.get_content_subtype() == 'plain': template['content_text'] = part.get_payload() elif part.get_content_subtype() == 'html': template['content_html'] = part.get_payload() elif ctype == 'text': if eml.get_content_subtype() == 'plain': template['content_text'] = eml.get_payload() elif eml.get_content_subtype == 'html': template['content_html'] = eml.get_payload() else: print('nope...') templates = [] templates.append(template) # pprint.pprint(template) output = 'output.et' with open(output, 'w') as out: out.write(json.dumps(templates)) #print(str(eml.keys()))