def test_store_samples_unicode_error(self): from datetime import datetime import shutil from src.modules.attachments import store_samples # Complete parameters conf = {"enabled": True, "base_path": "/tmp"} p = mailparser.parse_from_file(mail_test_9) attachments = MailAttachments.withhashes(p.attachments) attachments(intelligence=False) store_samples(conf, attachments) now = six.text_type(datetime.utcnow().date()) sample = os.path.join( "/tmp", now, "43573896890da36e092039cf0b3a92f8") self.assertTrue(os.path.exists(sample)) shutil.rmtree(os.path.join("/tmp", now)) p = mailparser.parse_from_file(mail_test_10) attachments = MailAttachments.withhashes(p.attachments) attachments(intelligence=False) store_samples(conf, attachments) sample = os.path.join( "/tmp", now, "2ea90c996ca28f751d4841e6c67892b8_REQUEST FOR QUOTE.zip") self.assertTrue(os.path.exists(sample)) shutil.rmtree(os.path.join("/tmp", now))
def test_defects_anomalies(self): mail = mailparser.parse_from_file(mail_malformed_1) self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) self.assertEqual(1, len(mail.defects_category)) self.assertIn("defects", mail.parsed_mail_obj) self.assertIn("StartBoundaryNotFoundDefect", mail.defects_category) self.assertIsInstance(mail.parsed_mail_json, six.text_type) result = len(mail.attachments_list) self.assertEqual(1, result) mail = mailparser.parse_from_file(mail_test_1) if six.PY2: self.assertEqual(False, mail.has_defects) self.assertNotIn("defects", mail.parsed_mail_obj) elif six.PY3: self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) self.assertEqual(1, len(mail.defects_category)) self.assertIn("defects", mail.parsed_mail_obj) self.assertIn("CloseBoundaryNotFoundDefect", mail.defects_category) self.assertEqual(True, mail.has_anomalies) self.assertEqual(2, len(mail.anomalies)) self.assertIn("anomalies", mail.parsed_mail_obj) self.assertIn("has_anomalies", mail.parsed_mail_obj)
def setUp(self): # Init p = mailparser.parse_from_file(mail) self.attachments = p.attachments p = mailparser.parse_from_file(mail_thug) self.attachments_thug = p.attachments
def test_issue_received(self): mail = mailparser.parse_from_file(mail_test_8) for i in mail.received: self.assertIn("date_utc", i) self.assertIsNotNone(i["date_utc"]) mail = mailparser.parse_from_file(mail_test_10) for i in mail.received: self.assertIn("date_utc", i)
def __init__(self, raw="", num=-1): self.raw = raw self.num = num """ try: parser = email.parser.HeaderParser() headers = parser.parsestr(self.raw.as_string()) content = re.split(";", headers['Content-Type'])[0] if content == "text/html" or content == "multipart/alternative": self.type = 'html' else: self.type = 'text' except: self.type = 'text' self.header = self.extract_header() self.body = self.extract_body() self.text = self.extract_text_full() """ try: mail = mailparser.parse_from_file(raw) self.header = mail.headers self.body = mail.body self.text = mail.body self.lang = self.find_language() self.mail = mail.mail_partial except Exception as e: print(e)
def parser_email(filepath, flag=1): mail = mailparser.parse_from_file(filepath) email_header = mail.headers email_from = email_header.get('From') if email_from is None: email_from = '' email_to = email_header.get('To') if email_to is None: email_to = '' email_body = mail.body email_Cc = email_header.get('Reply-To') if email_Cc is None: email_Cc = '' if flag == 1: csv_content = [ email_from + " " + email_to + " " + email_Cc, email_body, 1 ] else: csv_content = [ email_from + " " + email_to + " " + email_Cc, email_body, 0 ] return csv_content
def get_data_from_email(file): # initialisation de l'objet à retourner email_details = { 'message_id': None, 'name': None, 'from': None, 'to': [], 'date': None, 'subject': None, 'body': None } # parser l'email mail = mailparser.parse_from_file(file) # recuperer les infos de l'email email_details['message_id'] = mail.headers[ 'Message-ID'] #mail.message_as_string.partition('\n')[0].partition(':')[2] email_details['name'] = mail.headers['X-From'] email_details['from'] = mail.from_[0][1] for i in range(0, len(mail.to)): email_details['to'].append(mail.to[i][1]) email_details['date'] = mail.date email_details['subject'] = mail.subject if mail.subject else 'None' email_details['body'] = mail.body return email_details
def test_write_payload(self): with open(text_file) as f: payload = f.read() sha1_origin = fingerprints(payload).sha1 file_path = utils.write_payload(payload.encode("base64"), ".txt") self.assertEqual(os.path.splitext(file_path)[-1], ".txt") with open(file_path) as f: payload = f.read() sha1_clone = fingerprints(payload).sha1 self.assertEqual(sha1_origin, sha1_clone) self.assertTrue(os.path.exists(file_path)) os.remove(file_path) self.assertFalse(os.path.exists(file_path)) p = mailparser.parse_from_file(mail_test_11) attachments = MailAttachments.withhashes(p.attachments) attachments.run() for i in attachments: temp = utils.write_payload( i["payload"], i["extension"], i["content_transfer_encoding"], ) os.remove(temp)
def create_from_file(file): liham = Liham() mail = mailparser.parse_from_file(file) for attachment in mail.attachments: attach = dict() attach['filename'] = attachment.get('filename', None) attach['payload'] = attachment.get('payload', None) attach['binary'] = attachment.get('binary', None) attach['mail_content_type'] = attachment.get( 'mail_content_type', None) attach['content_id'] = attachment.get('content-id', None) attach['charset'] = attachment.get('charset', None) attach['content_transfer_encoding'] = attachment.get( 'content_transfer_encoding', None) liham.attachments.append(attach) # set values liham.date = mail.date liham.sender = mail._from liham.headers = mail.headers liham.message_id = mail.message_id liham.received = mail.received liham.subject = mail.subject liham.mail_text = mail.text_plain liham.mail_html = mail.text_html liham.recipients = mail.to liham.timezone = mail.timezone return liham
def parseEmailFromFile(emailFile): mail = mailparser.parse_from_file(emailFile) #noAsciiEmailBody = deleteNonASCII(mail.body) #email_body = cleanhtml(noAsciiEmailBody) #email_body.replace('', np.nan, inplace=True) #finalCleanEmail = email_body.replace('\n',' ').strip() return mail.body
def get_mail_files(): for i in range(1, 46): try: # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i)) mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Phish/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Ham/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/Jonathan_Mailbox/{}.eml'.format(i)) if 'ARC-Authentication-Results' in mail.headers or 'Authentication-Results' in mail.headers: try: headers = mail.headers['ARC-Authentication-Results'] except KeyError: headers = mail.headers['Authentication-Results'] else: headers = None test_mail_item = EmailData( \ mail.subject, \ mail.from_, \ mail.attachments, \ mail.body, \ headers ) test_mail_item.generate_features() except FileNotFoundError: pass
def __iter__(self): """ Finds all .eml files in self.base_dir and subdirectories of self.base_dir. Does its best to parse each email before releasing. """ # Eml exports often include duplicate emails. # We will try to limit the duplicates we release msg_ids = set() for fn in self.fns: msg = mailparser.parse_from_file(fn) if msg.message_id in msg_ids: continue msg_ids.add(msg.message_id) # Do our best to clean the msg body body = self._clean_body(msg.body) e = { "message_id": msg.message_id, # Keep only email addrs, not attempted parsed names "from": msg.from_[0][1], # Combine to and cc fields (i.e., no distinction made # between direct messages and group messages) "to": [a[1] for a in msg.to] + [a[1] for a in msg.cc], "date": str(msg.date), "subject": msg.subject, "body": body, "attachments": [a['filename'] for a in msg.attachments] } if not e['from'] or not e['to']: continue yield e
def __init__(self, mypath,nfile): self.mypath=mypath self.nfile=nfile self.onemail=None self.retval=None # filepath=os.path.join(mypath,i) filepath = os.path.join(mypath, nfile) # print(filepath) if not os.path.isfile(filepath): print("File not found!") exit(1) matchObj = re.match(r'.*.(msg|eml)$', nfile, re.M | re.I) if matchObj: if matchObj.group(1) == "msg": self.onemail = mailparser.parse_from_file_msg(filepath) elif matchObj.group(1) == "eml": self.onemail = mailparser.parse_from_file(filepath) else: print("Unsupported file type!") exit(1) else: print("Unsupported file type!") exit(1) if not self.onemail.headers: print("Broken file!") exit(1) self.retval=self.analyse_email()
def get_all_mails_body_list(): file_list = os.listdir(EMAIL_FOLDER_PATH) mails_body_list = [] for file in file_list: cur_email = mailparser.parse_from_file(EMAIL_FOLDER_PATH + file) mails_body_list.append(cur_email.body) print("parsed all mails body!") return mails_body_list
def test_mail_partial(self): mail = mailparser.parse_from_file(mail_test_10) self.assertNotEqual(mail.mail, mail.mail_partial) self.assertIn("message-id", mail.mail_partial) self.assertIn("x-ibm-av-version", mail.mail) self.assertNotIn("x-ibm-av-version", mail.mail_partial) result = mail.mail_partial_json self.assertIsInstance(result, six.text_type)
def test_get_mail_keys(self): mail = mailparser.parse_from_file(mail_test_11) all_parts = get_mail_keys(mail.message) mains_parts = get_mail_keys(mail.message, False) self.assertNotEqual(all_parts, mains_parts) self.assertIn("message-id", mains_parts) self.assertIn("x-filterd-recvd-size", all_parts) self.assertNotIn("x-filterd-recvd-size", mains_parts)
def body_extract(file): mail = mailparser.parse_from_file(file) os.system("clear") print("\n [*] BODY [*] ") print(" ----------") print("\n") print(mail.body) print("\n")
def setUp(self): parser = mailparser.parse_from_file(mail_thug) self.email = parser.mail self.attachments = parser.attachments parser = mailparser.parse_from_file(mail_form) self.email_form = parser.mail body = self.email_form.get("body") self.urls = utils.urls_extractor(body, self.faup) d = {"generic": "conf/keywords/targets.example.yml", "custom": "conf/keywords/targets_english.example.yml"} self.targets = utils.load_keywords_dict(d) d = {"generic": "conf/keywords/subjects.example.yml", "custom": "conf/keywords/subjects_english.example.yml"} self.subjects = utils.load_keywords_list(d)
def parse(self): # Initally parse the input file and put result into dict mail = mailparser.parse_from_file(self.input_file_path) parsed_mail_init_str = mail.mail_json parsed_mail_init = json.loads(parsed_mail_init_str) # Modify the init parse result to meet usage requirements parsed_mail_final = self.__modify_parsed_res(parsed_mail_init) return parsed_mail_final
def test_write_uuencode_attachment(self): mail = mailparser.parse_from_file(mail_test_15) temp_dir = tempfile.mkdtemp() mail.write_attachments(temp_dir) md5 = hashlib.md5() with open(os.path.join(temp_dir, 'REQUEST FOR QUOTE.zip'), 'rb') as f: md5.update(f.read()) shutil.rmtree(temp_dir) self.assertEqual(md5.hexdigest(), '4f2cf891e7cfb349fca812091f184ecc')
def main(): args = get_args().parse_args() if args.file: if args.outlook: parser = mailparser.parse_from_file_msg(args.file) else: parser = mailparser.parse_from_file(args.file) elif args.string: parser = mailparser.parse_from_string(args.string) elif args.stdin: if args.outlook: raise MailParserOutlookError( "You can't use stdin with msg Outlook") parser = mailparser.parse_from_file_obj(sys.stdin) if args.json: safe_print(parser.mail_json) if args.body: safe_print(parser.body) if args.headers: safe_print(parser.headers_json) if args.to: safe_print(parser.to_json) if args.delivered_to: safe_print(parser.delivered_to_json) if args.from_: safe_print(parser.from_json) if args.subject: safe_print(parser.subject) if args.receiveds: safe_print(parser.received_json) if args.defects: for i in parser.defects_categories: safe_print(i) if args.senderip: r = parser.get_server_ipaddress(args.senderip) if r: safe_print(r) else: safe_print("Not Found") if args.attachments or args.attachments_hash: print_attachments(parser.attachments, args.attachments_hash) if args.mail_hash: print_mail_fingerprints(parser.body.encode("utf-8"))
def test_receiveds_parsing(self): for i in self.all_mails: mail = mailparser.parse_from_file(i) receiveds = mail.received_raw result = receiveds_parsing(receiveds) self.assertIsInstance(result, list) for j in result: self.assertIsInstance(j, dict) self.assertIn("hop", j) self.assertIn("delay", j)
def test_fingerprints_body(self): mail = mailparser.parse_from_file(mail_test_1) md5, sha1, sha256, sha512 = fingerprints(mail.body.encode("utf-8")) self.assertEqual(md5, "1bbdb7dcf511113bbc0c1b214aeac392") self.assertEqual(sha1, "ce9e62b50fa4e2168278880b14460b905b24eb4b") self.assertEqual(sha256, ("1e9b96e3f1bc74702f9703391e8ba0715b849" "7127a7ff857013ab33385898574")) self.assertEqual(sha512, ("ad858f7b5ec5549e55650fd13df7683e403489" "77522995851fb6b625ac54744cf3a4bf652784" "dba971ef99afeec4e6caf2fdd10be72eabb730" "c312ffbe1c4de3"))
def main(): args = get_args().parse_args() if args.file: if args.outlook: parser = mailparser.parse_from_file_msg(args.file) else: parser = mailparser.parse_from_file(args.file) elif args.string: parser = mailparser.parse_from_string(args.string) if args.json: j = json.loads(parser.parsed_mail_json) safe_print(json.dumps(j, ensure_ascii=False, indent=4)) if args.body: # safe_print(parser.body) safe_print(parser.body) if args.headers: safe_print(parser.headers) if args.to: safe_print(parser.to_) if args.from_: safe_print(parser.from_) if args.subject: safe_print(parser.subject) if args.receiveds: safe_print(parser.receiveds) if args.defects: for i in parser.defects_category: safe_print(i) if args.anomalies: for i in parser.anomalies: safe_print(i) if args.senderip: r = parser.get_server_ipaddress(args.senderip) if r: safe_print(r) else: safe_print("Not Found") if args.attachments or args.attachments_hash: print_attachments(parser.attachments_list, args.attachments_hash) if args.mail_hash: print_mail_fingerprints(parser.body.encode("utf-8"))
def test_text2urls_whitelisted_nonetype_error(self): p = mailparser.parse_from_file(mail_test_7) body = p.body urls = utils.urls_extractor(body, self.faup) for k in urls: self.assertIsNotNone(k) d = {"generic": {"path": "conf/whitelists/generic.example.yml"}} whitelist = utils.load_whitelist(d) utils.text2urls_whitelisted(body, whitelist, self.faup)
def test_defects_bug(self): mail = mailparser.parse_from_file(mail_malformed_2) self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) self.assertEqual(1, len(mail.defects_categories)) self.assertIn("defects", mail.mail) self.assertIn("StartBoundaryNotFoundDefect", mail.defects_categories) self.assertIsInstance(mail.parsed_mail_json, six.text_type) result = len(mail.attachments) self.assertEqual(0, result)
def test_types(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" self.assertEqual(False, mail.has_defects) result = mail.mail self.assertIsInstance(result, dict) self.assertNotIn("defects", result) self.assertIn("has_defects", result) result = mail.get_server_ipaddress(trust) self.assertIsInstance(result, six.text_type) result = mail.mail_json self.assertIsInstance(result, six.text_type) result = mail.headers_json self.assertIsInstance(result, six.text_type) result = mail.headers self.assertIsInstance(result, dict) result = mail.body self.assertIsInstance(result, six.text_type) result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.from_ self.assertIsInstance(result, list) result = mail.to self.assertIsInstance(result, list) self.assertEquals(len(result), 2) self.assertIsInstance(result[0], tuple) self.assertEquals(len(result[0]), 2) result = mail.subject self.assertIsInstance(result, six.text_type) result = mail.message_id self.assertIsInstance(result, six.text_type) result = mail.attachments self.assertIsInstance(result, list) result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.defects self.assertIsInstance(result, list)
def test_get_to_domains(self): m = mailparser.parse_from_file(mail_test_6) domains_1 = get_to_domains(m.to, m.reply_to) self.assertIsInstance(domains_1, list) self.assertIn("test.it", domains_1) domains_2 = m.to_domains self.assertIsInstance(domains_2, list) self.assertIn("test.it", domains_2) self.assertEquals(domains_1, domains_2) self.assertIsInstance(m.to_domains_json, six.text_type)
def test_msgconvert(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 The email used for unittest were found randomly on VirusTotal and then already publicly available so can not be considered as privacy violation """ f, _ = msgconvert(mail_outlook_1) self.assertTrue(os.path.exists(f)) m = mailparser.parse_from_file(f) self.assertEqual(m.from_[0][1], "*****@*****.**")
def test_receiveds(self): mail = mailparser.parse_from_file(mail_test_1) self.assertEqual(len(mail.received), 4) self.assertIsInstance(mail.received, list) for i in mail.received: self.assertIsInstance(i, dict) self.assertIsInstance(mail.received_raw, list) for i in mail.received_raw: self.assertIsInstance(i, six.text_type) self.assertIsInstance(mail.received_json, six.text_type)