def test_parser_methods(self): m = mailparser.MailParser() self.assertIsNone(m.message) m.parse_from_file(mail_test_3) result = m.parsed_mail_obj self.assertEqual(len(result["attachments"]), 1) n = mailparser.MailParser() n.parse_from_string(m.message_as_string) self.assertEqual(len(result["attachments"]), 1)
def test_valid_date_mail(self): parser = mailparser.MailParser() parser.parse_from_file(mail_test_1), self.assertIn( "mail_without_date", parser.anomalies, )
def test_defects_bug(self): parser = mailparser.MailParser() parser.parse_from_file(mail_malformed_2) self.assertEqual(True, parser.has_defects) self.assertEqual(1, len(parser.defects)) self.assertEqual(1, len(parser.defects_category)) self.assertIn("defects", parser.parsed_mail_obj) self.assertIn("StartBoundaryNotFoundDefect", parser.defects_category) self.assertIsInstance(parser.parsed_mail_json, unicode) result = len(parser.attachments_list) self.assertEqual(0, result)
def test_types(self): parser = mailparser.MailParser() parser.parse_from_file(mail_test_2) trust = "smtp.customers.net" self.assertEqual(False, parser.has_defects) result = parser.parsed_mail_obj self.assertIsInstance(result, dict) self.assertNotIn("defects", result) self.assertNotIn("anomalies", result) result = parser.get_server_ipaddress(trust) self.assertIsInstance(result, unicode) result = parser.parsed_mail_json self.assertIsInstance(result, unicode) result = parser.headers self.assertIsInstance(result, unicode) result = parser.body self.assertIsInstance(result, unicode) result = parser.date_mail self.assertIsInstance(result, datetime.datetime) result = parser.from_ self.assertIsInstance(result, unicode) result = parser.to_ self.assertIsInstance(result, unicode) result = parser.subject self.assertIsInstance(result, unicode) result = parser.message_id self.assertIsInstance(result, unicode) result = parser.attachments_list self.assertIsInstance(result, list) result = parser.date_mail self.assertIsInstance(result, datetime.datetime) result = parser.defects self.assertIsInstance(result, list) result = parser.anomalies self.assertIsInstance(result, list)
def test_parse_from_file_msg(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 The email used for unittest were found randomly on VirusTotal and then already publicly available so can not be considered as privacy violation """ m = mailparser.parse_from_file_msg(mail_outlook_1) email = m.parsed_mail_obj self.assertIn("attachments", email) self.assertEqual(len(email["attachments"]), 5) self.assertIn("from", email) self.assertEqual(email["from"], "<*****@*****.**>") self.assertIn("subject", email) m = mailparser.MailParser() m = m.parse_from_file_msg(mail_outlook_1) self.assertEqual(email["body"], m.body)
def test_parsing_know_values(self): parser = mailparser.MailParser() parser.parse_from_file(mail_test_2) trust = "smtp.customers.net" self.assertEqual(False, parser.has_defects) raw = "217.76.210.112" result = parser.get_server_ipaddress(trust) self.assertEqual(raw, result) raw = "<*****@*****.**>" result = parser.message_id self.assertEqual(raw, result) raw = "mporcile@server_mail.it" result = parser.to_ self.assertEqual(raw, result) raw = "<*****@*****.**>" result = parser.from_ self.assertEqual(raw, result) raw = "Bollettino Meteorologico del 29/11/2015" result = parser.subject self.assertEqual(raw, result) result = parser.has_defects self.assertEqual(False, result) result = len(parser.attachments_list) self.assertEqual(3, result) raw = "Sun, 29 Nov 2015 09:45:18 +0100" raw_utc = datetime.datetime( 2015, 11, 29, 8, 45, 18, 0 ).isoformat() result = parser.date_mail.isoformat() self.assertEqual(raw_utc, result)
def __init__(self, filepath: Union[Path, str] = None, pseudofile: BytesIO = None, attach_original_email: bool = True, **kwargs): super().__init__("email", **kwargs) if not mailparser: raise MISPMailObjectOutlookException('mail-parser is required to use this module, you can install it by running pip3 install pymisp[email]') converted = False if filepath: if str(filepath).endswith(".msg"): pseudofile = self.__convert_outlook_msg_format(str(filepath)) converted = True else: with open(filepath, "rb") as f: pseudofile = BytesIO(f.read()) elif pseudofile and isinstance(pseudofile, BytesIO): if magic: # if python-magic is installed, we can autodetect MS Outlook format mime = magic.from_buffer(pseudofile.read(2048), mime=True) pseudofile.seek(0) if mime == "application/CDFV2": # save outlook msg file to temporary file temph, temp = tempfile.mkstemp(prefix="outlook_") with os.fdopen(temph, "wb") as fdfile: fdfile.write(pseudofile.getvalue()) fdfile.close() pseudofile = self.__convert_outlook_msg_format(temp) os.unlink(temp) # remove temporary file necessary to convert formats converted = True else: raise InvalidMISPObject("File buffer (BytesIO) or a path is required.") if attach_original_email: self.add_attribute("eml", value="Full email.eml", data=pseudofile, comment="Converted from MSG format" if converted else None) message = self.attempt_decoding(pseudofile) self.__parser = mailparser.MailParser(message) self.__generate_attributes()
def test_add_content_type(self): parser = mailparser.MailParser() parser.parse_from_file(mail_test_3) self.assertEqual(False, parser.has_defects) result = parser.parsed_mail_obj self.assertEqual( len(result["attachments"]), 1 ) self.assertIsInstance( result["attachments"][0]["mail_content_type"], unicode ) self.assertIsInstance( result["attachments"][0]["payload"], unicode ) self.assertEqual( result["attachments"][0]["content_transfer_encoding"], "quoted-printable", )
def test_valid_mail(self): with self.assertRaises(mailparser.InvalidMail): parser = mailparser.MailParser() parser.parse_from_string("fake mail")