def email_parser(email_file): html_flag = 0 with open(email_file, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) # print('Subject:', msg['subject']) if not msg.is_multipart(): # print("Singular email") if msg.get_content_maintype() == "text": if msg.get_content_subtype() == "plain": # print(msg.get_content_type()) body = msg.get_body(preferencelist='text/plain') # print(body) elif msg.get_content_subtype() == "html": # print(msg.get_content_type()) body = msg.get_body(preferencelist='html') # print("----Body from get_body()-------") # print(body) html_body = str(body).split("\n")[3:] html_body = '\n'.join(html_body) # print("----Parsed text through beautiful soup-------") body = html_parse(html_body) # print(body) else: print("Don't know if html or text {}".format( msg.get_content_subtype())) else: print("Email is multipart") i = 0 for part in msg.walk(): i = i + 1 print("part " + str(i)) cdispo = str(part.get('Content-Disposition')) print(cdispo) print(part.get_content_type()) print(part.get_content_subtype()) if part.get_content_type( ) == 'multipart/alternative' or part.get_content_type( ) == 'multipart/related': body = part.get_body(preferencelist='html') print("----Body from get_body()-------") print(body) html_body = str(body).split("\n")[3:] html_body = '\n'.join(html_body) print("----Parsed text through beautiful soup-------") body = html_parse(html_body) print(body) if part.get_content_type() == 'text/plain': body = part.get_payload(decode=True) # decode print(body) break return body
def display_eml(eml_filepath): ## -> treba vyladit!!! with open(eml_filepath, 'rb') as eml_file: msg = BytesParser(policy=policy.default).parse(eml_file) text = msg.get_body(preferencelist=('plain')).get_content() # sk = get_info_from_mail_field(msg['from']) # eml_output = eml_file.read() eml_output = msg # eml_output = msg #get_all('Content-Dispositio found = [] for part in msg.walk(): if 'content-disposition' not in part: continue cdisp = part['content-disposition'].split(';') cdisp = [x.strip() for x in cdisp] if cdisp[0].lower() != 'attachment': continue parsed = {} for kv in cdisp[1:]: key, val = kv.split('=') if val.startswith('"'): val = val.strip('"') elif val.startswith("'"): val = val.strip("'") parsed[key] = val found.append((parsed, part)) eml_output = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": found #[0] } #print('eml_output',eml_output, msg.get('Cc')) if msg.get_content_maintype() == 'multipart': # <--zjisti zda potrebujes - jinak smaz # loop on the parts of the mail for part in msg.walk(): # find the attachment part - so skip all the other parts if part.get_content_maintype() == 'multipart': continue if part.get_content_maintype() == 'text': content = part.get_body(preferencelist=('plain')) if content: output = part.get_body(preferencelist=('plain')).get_content() else: output = None continue if part.get('Content-Disposition') == 'inline': continue if part.get('Content-Disposition') is None: continue # save the attachment in the program directory result_dict = { "Odesílatel": msg.get('From'), "Příjemce": msg.get('To'), "Datum": msg.get('Date'), "Předmět": msg.get('Subject'), "Text zprávy": output, #msg.get_body(preferencelist=('plain')).get_content(), "Přílohy": part.get_all('Content-Disposition') } #eml_output = result_dict #print('result_dict',result_dict) return eml_output
] template = {key: '' for key in keys} # Now the header items can be accessed as a dictionary: # print('To: {}'.format(eml['to'])) # print('From: {}'.format(eml['from'])) template['from'] = eml['from'] # print('Subject: {}'.format(eml['subject'])) template['subject'] = eml['subject'] # You can also access the parts of the addresses: # print('Recipient username: {}'.format(eml['to'].addresses[0].username)) # print('Sender name: {}'.format(eml['from'].addresses[0].display_name)) template['from_display_name'] = eml['from'].addresses[0].display_name ctype = eml.get_content_maintype() if ctype == 'multipart': for part in eml.get_payload(): subctype = part.get_content_maintype() if subctype == 'text': if part.get_content_subtype() == 'plain': template['content_text'] = part.get_payload() elif part.get_content_subtype() == 'html': template['content_html'] = part.get_payload() elif ctype == 'text': if eml.get_content_subtype() == 'plain': template['content_text'] = eml.get_payload() elif eml.get_content_subtype == 'html': template['content_html'] = eml.get_payload() else: print('nope...')