Ejemplo n.º 1
0
    def test_store_samples_unicode_error(self):
        from datetime import datetime
        import shutil
        from src.modules.attachments import store_samples

        # Complete parameters
        conf = {"enabled": True,
                "base_path": "/tmp"}

        p = mailparser.parse_from_file(mail_test_9)
        attachments = MailAttachments.withhashes(p.attachments)
        attachments(intelligence=False)
        store_samples(conf, attachments)

        now = six.text_type(datetime.utcnow().date())
        sample = os.path.join(
            "/tmp",
            now,
            "43573896890da36e092039cf0b3a92f8")
        self.assertTrue(os.path.exists(sample))
        shutil.rmtree(os.path.join("/tmp", now))

        p = mailparser.parse_from_file(mail_test_10)
        attachments = MailAttachments.withhashes(p.attachments)
        attachments(intelligence=False)
        store_samples(conf, attachments)
        sample = os.path.join(
            "/tmp",
            now,
            "2ea90c996ca28f751d4841e6c67892b8_REQUEST FOR QUOTE.zip")
        self.assertTrue(os.path.exists(sample))
        shutil.rmtree(os.path.join("/tmp", now))
Ejemplo n.º 2
0
    def test_defects_anomalies(self):
        mail = mailparser.parse_from_file(mail_malformed_1)

        self.assertEqual(True, mail.has_defects)
        self.assertEqual(1, len(mail.defects))
        self.assertEqual(1, len(mail.defects_category))
        self.assertIn("defects", mail.parsed_mail_obj)
        self.assertIn("StartBoundaryNotFoundDefect", mail.defects_category)
        self.assertIsInstance(mail.parsed_mail_json, six.text_type)

        result = len(mail.attachments_list)
        self.assertEqual(1, result)

        mail = mailparser.parse_from_file(mail_test_1)
        if six.PY2:
            self.assertEqual(False, mail.has_defects)
            self.assertNotIn("defects", mail.parsed_mail_obj)
        elif six.PY3:
            self.assertEqual(True, mail.has_defects)
            self.assertEqual(1, len(mail.defects))
            self.assertEqual(1, len(mail.defects_category))
            self.assertIn("defects", mail.parsed_mail_obj)
            self.assertIn("CloseBoundaryNotFoundDefect", mail.defects_category)

        self.assertEqual(True, mail.has_anomalies)
        self.assertEqual(2, len(mail.anomalies))
        self.assertIn("anomalies", mail.parsed_mail_obj)
        self.assertIn("has_anomalies", mail.parsed_mail_obj)
Ejemplo n.º 3
0
    def setUp(self):

        # Init

        p = mailparser.parse_from_file(mail)
        self.attachments = p.attachments

        p = mailparser.parse_from_file(mail_thug)
        self.attachments_thug = p.attachments
Ejemplo n.º 4
0
    def test_issue_received(self):
        mail = mailparser.parse_from_file(mail_test_8)
        for i in mail.received:
            self.assertIn("date_utc", i)
            self.assertIsNotNone(i["date_utc"])

        mail = mailparser.parse_from_file(mail_test_10)
        for i in mail.received:
            self.assertIn("date_utc", i)
Ejemplo n.º 5
0
    def __init__(self, raw="", num=-1):
        self.raw = raw
        self.num = num
        """
        try:
            parser = email.parser.HeaderParser()
            headers = parser.parsestr(self.raw.as_string())
            content = re.split(";", headers['Content-Type'])[0]

            if content == "text/html" or content == "multipart/alternative":
                self.type = 'html'
            else:
                self.type = 'text'

        except:
            self.type = 'text'

        self.header = self.extract_header()
        self.body = self.extract_body()
        self.text = self.extract_text_full()
        """
        try:
            mail = mailparser.parse_from_file(raw)
            self.header = mail.headers
            self.body = mail.body
            self.text = mail.body
            self.lang = self.find_language()
            self.mail = mail.mail_partial
        except Exception as e:
            print(e)
def parser_email(filepath, flag=1):
    mail = mailparser.parse_from_file(filepath)

    email_header = mail.headers

    email_from = email_header.get('From')

    if email_from is None:
        email_from = ''
    email_to = email_header.get('To')

    if email_to is None:
        email_to = ''

    email_body = mail.body

    email_Cc = email_header.get('Reply-To')

    if email_Cc is None:
        email_Cc = ''
    if flag == 1:
        csv_content = [
            email_from + " " + email_to + " " + email_Cc, email_body, 1
        ]
    else:
        csv_content = [
            email_from + " " + email_to + " " + email_Cc, email_body, 0
        ]
    return csv_content
Ejemplo n.º 7
0
def get_data_from_email(file):

    # initialisation de l'objet à retourner
    email_details = {
        'message_id': None,
        'name': None,
        'from': None,
        'to': [],
        'date': None,
        'subject': None,
        'body': None
    }

    # parser l'email
    mail = mailparser.parse_from_file(file)

    # recuperer les infos de l'email
    email_details['message_id'] = mail.headers[
        'Message-ID']  #mail.message_as_string.partition('\n')[0].partition(':')[2]
    email_details['name'] = mail.headers['X-From']
    email_details['from'] = mail.from_[0][1]
    for i in range(0, len(mail.to)):
        email_details['to'].append(mail.to[i][1])

    email_details['date'] = mail.date
    email_details['subject'] = mail.subject if mail.subject else 'None'
    email_details['body'] = mail.body

    return email_details
Ejemplo n.º 8
0
    def test_write_payload(self):
        with open(text_file) as f:
            payload = f.read()
        sha1_origin = fingerprints(payload).sha1

        file_path = utils.write_payload(payload.encode("base64"), ".txt")
        self.assertEqual(os.path.splitext(file_path)[-1], ".txt")

        with open(file_path) as f:
            payload = f.read()
        sha1_clone = fingerprints(payload).sha1

        self.assertEqual(sha1_origin, sha1_clone)
        self.assertTrue(os.path.exists(file_path))

        os.remove(file_path)
        self.assertFalse(os.path.exists(file_path))

        p = mailparser.parse_from_file(mail_test_11)
        attachments = MailAttachments.withhashes(p.attachments)
        attachments.run()

        for i in attachments:
            temp = utils.write_payload(
                i["payload"],
                i["extension"],
                i["content_transfer_encoding"],
            )
            os.remove(temp)
Ejemplo n.º 9
0
    def create_from_file(file):
        liham = Liham()
        mail = mailparser.parse_from_file(file)
        for attachment in mail.attachments:
            attach = dict()
            attach['filename'] = attachment.get('filename', None)
            attach['payload'] = attachment.get('payload', None)
            attach['binary'] = attachment.get('binary', None)
            attach['mail_content_type'] = attachment.get(
                'mail_content_type', None)
            attach['content_id'] = attachment.get('content-id', None)
            attach['charset'] = attachment.get('charset', None)
            attach['content_transfer_encoding'] = attachment.get(
                'content_transfer_encoding', None)
            liham.attachments.append(attach)

        # set values
        liham.date = mail.date
        liham.sender = mail._from
        liham.headers = mail.headers
        liham.message_id = mail.message_id
        liham.received = mail.received
        liham.subject = mail.subject
        liham.mail_text = mail.text_plain
        liham.mail_html = mail.text_html
        liham.recipients = mail.to
        liham.timezone = mail.timezone
        return liham
Ejemplo n.º 10
0
def parseEmailFromFile(emailFile):
    mail = mailparser.parse_from_file(emailFile)
    #noAsciiEmailBody = deleteNonASCII(mail.body)
    #email_body = cleanhtml(noAsciiEmailBody)
    #email_body.replace('', np.nan, inplace=True)
    #finalCleanEmail = email_body.replace('\n',' ').strip()
    return mail.body
Ejemplo n.º 11
0
def get_mail_files():
    for i in range(1, 46):
        try:
            # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i))
            mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Phish/{}.eml'.format(i))
            # mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Ham/{}.eml'.format(i))
            # mail = mailparser.parse_from_file('../../Mailboxes/Jonathan_Mailbox/{}.eml'.format(i))

            if 'ARC-Authentication-Results' in mail.headers or 'Authentication-Results' in mail.headers:
                try:
                    headers = mail.headers['ARC-Authentication-Results']
                except KeyError:
                    headers = mail.headers['Authentication-Results']
            else:
                headers = None

            test_mail_item = EmailData( \
            mail.subject, \
            mail.from_, \
            mail.attachments, \
            mail.body, \
            headers
            )

            test_mail_item.generate_features()
        except FileNotFoundError:
            pass
Ejemplo n.º 12
0
    def __iter__(self):
        """
        Finds all .eml files in self.base_dir
        and subdirectories of self.base_dir.
        Does its best to parse each email before
        releasing.
        """

        # Eml exports often include duplicate emails.
        # We will try to limit the duplicates we release
        msg_ids = set()
        for fn in self.fns:
            msg = mailparser.parse_from_file(fn)
            if msg.message_id in msg_ids:
                continue
            msg_ids.add(msg.message_id)
            # Do our best to clean the msg body
            body = self._clean_body(msg.body)
            e = {
                "message_id": msg.message_id,
                # Keep only email addrs, not attempted parsed names
                "from": msg.from_[0][1],
                # Combine to and cc fields (i.e., no distinction made
                #   between direct messages and group messages)
                "to": [a[1] for a in msg.to] + [a[1] for a in msg.cc],
                "date": str(msg.date),
                "subject": msg.subject,
                "body": body,
                "attachments": [a['filename'] for a in msg.attachments]
            }
            if not e['from'] or not e['to']:
                continue
            yield e
Ejemplo n.º 13
0
    def __init__(self, mypath,nfile):
        self.mypath=mypath
        self.nfile=nfile
        self.onemail=None
        self.retval=None

        # filepath=os.path.join(mypath,i)
        filepath = os.path.join(mypath, nfile)
        #	print(filepath)
        if not os.path.isfile(filepath):
            print("File not found!")
            exit(1)

        matchObj = re.match(r'.*.(msg|eml)$', nfile, re.M | re.I)
        if matchObj:
            if matchObj.group(1) == "msg":
                self.onemail = mailparser.parse_from_file_msg(filepath)
            elif matchObj.group(1) == "eml":
                self.onemail = mailparser.parse_from_file(filepath)
            else:
                print("Unsupported file type!")
                exit(1)
        else:
            print("Unsupported file type!")
            exit(1)

        if not self.onemail.headers:
            print("Broken file!")
            exit(1)
        self.retval=self.analyse_email()
Ejemplo n.º 14
0
def get_all_mails_body_list():
    file_list = os.listdir(EMAIL_FOLDER_PATH)
    mails_body_list = []
    for file in file_list:
        cur_email = mailparser.parse_from_file(EMAIL_FOLDER_PATH + file)
        mails_body_list.append(cur_email.body)
    print("parsed all mails body!")
    return mails_body_list
 def test_mail_partial(self):
     mail = mailparser.parse_from_file(mail_test_10)
     self.assertNotEqual(mail.mail, mail.mail_partial)
     self.assertIn("message-id", mail.mail_partial)
     self.assertIn("x-ibm-av-version", mail.mail)
     self.assertNotIn("x-ibm-av-version", mail.mail_partial)
     result = mail.mail_partial_json
     self.assertIsInstance(result, six.text_type)
 def test_get_mail_keys(self):
     mail = mailparser.parse_from_file(mail_test_11)
     all_parts = get_mail_keys(mail.message)
     mains_parts = get_mail_keys(mail.message, False)
     self.assertNotEqual(all_parts, mains_parts)
     self.assertIn("message-id", mains_parts)
     self.assertIn("x-filterd-recvd-size", all_parts)
     self.assertNotIn("x-filterd-recvd-size", mains_parts)
def body_extract(file):
    mail = mailparser.parse_from_file(file)
    os.system("clear")
    print("\n [*] BODY [*] ")
    print("  ----------")
    print("\n")
    print(mail.body)
    print("\n")
Ejemplo n.º 18
0
    def setUp(self):
        parser = mailparser.parse_from_file(mail_thug)
        self.email = parser.mail
        self.attachments = parser.attachments

        parser = mailparser.parse_from_file(mail_form)
        self.email_form = parser.mail

        body = self.email_form.get("body")
        self.urls = utils.urls_extractor(body, self.faup)

        d = {"generic": "conf/keywords/targets.example.yml",
             "custom": "conf/keywords/targets_english.example.yml"}
        self.targets = utils.load_keywords_dict(d)

        d = {"generic": "conf/keywords/subjects.example.yml",
             "custom": "conf/keywords/subjects_english.example.yml"}
        self.subjects = utils.load_keywords_list(d)
Ejemplo n.º 19
0
    def parse(self):
        # Initally parse the input file and put result into dict
        mail = mailparser.parse_from_file(self.input_file_path)
        parsed_mail_init_str = mail.mail_json
        parsed_mail_init = json.loads(parsed_mail_init_str)

        # Modify the init parse result to meet usage requirements
        parsed_mail_final = self.__modify_parsed_res(parsed_mail_init)
        return parsed_mail_final
Ejemplo n.º 20
0
 def test_write_uuencode_attachment(self):
     mail = mailparser.parse_from_file(mail_test_15)
     temp_dir = tempfile.mkdtemp()
     mail.write_attachments(temp_dir)
     md5 = hashlib.md5()
     with open(os.path.join(temp_dir, 'REQUEST FOR QUOTE.zip'), 'rb') as f:
         md5.update(f.read())
     shutil.rmtree(temp_dir)
     self.assertEqual(md5.hexdigest(), '4f2cf891e7cfb349fca812091f184ecc')
Ejemplo n.º 21
0
def main():
    args = get_args().parse_args()

    if args.file:
        if args.outlook:
            parser = mailparser.parse_from_file_msg(args.file)
        else:
            parser = mailparser.parse_from_file(args.file)
    elif args.string:
        parser = mailparser.parse_from_string(args.string)
    elif args.stdin:
        if args.outlook:
            raise MailParserOutlookError(
                "You can't use stdin with msg Outlook")
        parser = mailparser.parse_from_file_obj(sys.stdin)

    if args.json:
        safe_print(parser.mail_json)

    if args.body:
        safe_print(parser.body)

    if args.headers:
        safe_print(parser.headers_json)

    if args.to:
        safe_print(parser.to_json)

    if args.delivered_to:
        safe_print(parser.delivered_to_json)

    if args.from_:
        safe_print(parser.from_json)

    if args.subject:
        safe_print(parser.subject)

    if args.receiveds:
        safe_print(parser.received_json)

    if args.defects:
        for i in parser.defects_categories:
            safe_print(i)

    if args.senderip:
        r = parser.get_server_ipaddress(args.senderip)
        if r:
            safe_print(r)
        else:
            safe_print("Not Found")

    if args.attachments or args.attachments_hash:
        print_attachments(parser.attachments, args.attachments_hash)

    if args.mail_hash:
        print_mail_fingerprints(parser.body.encode("utf-8"))
Ejemplo n.º 22
0
 def test_receiveds_parsing(self):
     for i in self.all_mails:
         mail = mailparser.parse_from_file(i)
         receiveds = mail.received_raw
         result = receiveds_parsing(receiveds)
         self.assertIsInstance(result, list)
         for j in result:
             self.assertIsInstance(j, dict)
             self.assertIn("hop", j)
             self.assertIn("delay", j)
Ejemplo n.º 23
0
 def test_fingerprints_body(self):
     mail = mailparser.parse_from_file(mail_test_1)
     md5, sha1, sha256, sha512 = fingerprints(mail.body.encode("utf-8"))
     self.assertEqual(md5, "1bbdb7dcf511113bbc0c1b214aeac392")
     self.assertEqual(sha1, "ce9e62b50fa4e2168278880b14460b905b24eb4b")
     self.assertEqual(sha256, ("1e9b96e3f1bc74702f9703391e8ba0715b849"
                               "7127a7ff857013ab33385898574"))
     self.assertEqual(sha512, ("ad858f7b5ec5549e55650fd13df7683e403489"
                               "77522995851fb6b625ac54744cf3a4bf652784"
                               "dba971ef99afeec4e6caf2fdd10be72eabb730"
                               "c312ffbe1c4de3"))
Ejemplo n.º 24
0
def main():
    args = get_args().parse_args()

    if args.file:
        if args.outlook:
            parser = mailparser.parse_from_file_msg(args.file)
        else:
            parser = mailparser.parse_from_file(args.file)
    elif args.string:
        parser = mailparser.parse_from_string(args.string)

    if args.json:
        j = json.loads(parser.parsed_mail_json)
        safe_print(json.dumps(j, ensure_ascii=False, indent=4))

    if args.body:
        # safe_print(parser.body)
        safe_print(parser.body)

    if args.headers:
        safe_print(parser.headers)

    if args.to:
        safe_print(parser.to_)

    if args.from_:
        safe_print(parser.from_)

    if args.subject:
        safe_print(parser.subject)

    if args.receiveds:
        safe_print(parser.receiveds)

    if args.defects:
        for i in parser.defects_category:
            safe_print(i)

    if args.anomalies:
        for i in parser.anomalies:
            safe_print(i)

    if args.senderip:
        r = parser.get_server_ipaddress(args.senderip)
        if r:
            safe_print(r)
        else:
            safe_print("Not Found")

    if args.attachments or args.attachments_hash:
        print_attachments(parser.attachments_list, args.attachments_hash)

    if args.mail_hash:
        print_mail_fingerprints(parser.body.encode("utf-8"))
Ejemplo n.º 25
0
    def test_text2urls_whitelisted_nonetype_error(self):
        p = mailparser.parse_from_file(mail_test_7)
        body = p.body
        urls = utils.urls_extractor(body, self.faup)

        for k in urls:
            self.assertIsNotNone(k)

        d = {"generic": {"path": "conf/whitelists/generic.example.yml"}}
        whitelist = utils.load_whitelist(d)

        utils.text2urls_whitelisted(body, whitelist, self.faup)
Ejemplo n.º 26
0
    def test_defects_bug(self):
        mail = mailparser.parse_from_file(mail_malformed_2)

        self.assertEqual(True, mail.has_defects)
        self.assertEqual(1, len(mail.defects))
        self.assertEqual(1, len(mail.defects_categories))
        self.assertIn("defects", mail.mail)
        self.assertIn("StartBoundaryNotFoundDefect", mail.defects_categories)
        self.assertIsInstance(mail.parsed_mail_json, six.text_type)

        result = len(mail.attachments)
        self.assertEqual(0, result)
Ejemplo n.º 27
0
    def test_types(self):
        mail = mailparser.parse_from_file(mail_test_2)
        trust = "smtp.customers.net"

        self.assertEqual(False, mail.has_defects)

        result = mail.mail
        self.assertIsInstance(result, dict)
        self.assertNotIn("defects", result)
        self.assertIn("has_defects", result)

        result = mail.get_server_ipaddress(trust)
        self.assertIsInstance(result, six.text_type)

        result = mail.mail_json
        self.assertIsInstance(result, six.text_type)

        result = mail.headers_json
        self.assertIsInstance(result, six.text_type)

        result = mail.headers
        self.assertIsInstance(result, dict)

        result = mail.body
        self.assertIsInstance(result, six.text_type)

        result = mail.date
        self.assertIsInstance(result, datetime.datetime)

        result = mail.from_
        self.assertIsInstance(result, list)

        result = mail.to
        self.assertIsInstance(result, list)
        self.assertEquals(len(result), 2)
        self.assertIsInstance(result[0], tuple)
        self.assertEquals(len(result[0]), 2)

        result = mail.subject
        self.assertIsInstance(result, six.text_type)

        result = mail.message_id
        self.assertIsInstance(result, six.text_type)

        result = mail.attachments
        self.assertIsInstance(result, list)

        result = mail.date
        self.assertIsInstance(result, datetime.datetime)

        result = mail.defects
        self.assertIsInstance(result, list)
Ejemplo n.º 28
0
    def test_get_to_domains(self):
        m = mailparser.parse_from_file(mail_test_6)

        domains_1 = get_to_domains(m.to, m.reply_to)
        self.assertIsInstance(domains_1, list)
        self.assertIn("test.it", domains_1)

        domains_2 = m.to_domains
        self.assertIsInstance(domains_2, list)
        self.assertIn("test.it", domains_2)
        self.assertEquals(domains_1, domains_2)

        self.assertIsInstance(m.to_domains_json, six.text_type)
Ejemplo n.º 29
0
    def test_msgconvert(self):
        """
        Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0

        The email used for unittest were found randomly on VirusTotal and
        then already publicly available so can not be considered
        as privacy violation
        """

        f, _ = msgconvert(mail_outlook_1)
        self.assertTrue(os.path.exists(f))
        m = mailparser.parse_from_file(f)
        self.assertEqual(m.from_[0][1], "*****@*****.**")
Ejemplo n.º 30
0
    def test_receiveds(self):
        mail = mailparser.parse_from_file(mail_test_1)
        self.assertEqual(len(mail.received), 4)

        self.assertIsInstance(mail.received, list)
        for i in mail.received:
            self.assertIsInstance(i, dict)

        self.assertIsInstance(mail.received_raw, list)
        for i in mail.received_raw:
            self.assertIsInstance(i, six.text_type)

        self.assertIsInstance(mail.received_json, six.text_type)