Ejemplos de ExtractMsg en Python, ejemplos de ExtractMsg en Python

Ejemplo n.º 1

0

Mostrar archivo

def parse_msg(filename):
    msg = ExtractMsg.Message(filename)

    attachments = get_attachments_json(msg)

    guid = str(uuid.uuid1())
    senders, senders_line = addrs(
        re.split('[,;]', str_to_unicode(msg.sender, default=u"NoSender")))
    tos, tos_line = addrs(
        re.split('[,;]', str_to_unicode(msg.to, default=u"NoRcvr")))
    ccs, ccs_line = addrs(re.split('[,;]', str_to_unicode(msg.cc)))
    try:
        date = dateToUTCstr(msg.date)
    except:
        print "FAILED to parse msg date.  Setting date to default value for filename {}".format(
            str(filename))
        date = "2010-01-01T00:00:00"

    return {
        'id': guid,
        'senders_line': senders_line,
        'senders': senders,
        'tos_line': tos_line,
        'tos': tos,
        'ccs_line': ccs_line,
        'ccs': ccs,
        'bccs_line': '',
        'bccs': [],
        'subject': str_to_unicode(msg.subject),
        'datetime': date,
        'attachments': attachments,
        'body': str_to_unicode(msg.body)
        # TODO make sure this is noo needed
        # 'body': imap_utf7.decode(msg.body)
    }

Ejemplo n.º 2

0

Mostrar archivo

Archivo: tf_idf_final.py Proyecto: manas2mail/dfg

def doc_list(): 
    bloblist = []
    d = path.dirname(__file__)
    parent_dir = path.abspath(d + "/../")  
    path1=path.join(parent_dir,r'module\db\New\*.msg' )
    files = glob.glob(path1)
    for file in files:          #files[:10]:
        msg = ExtractMsg.Message(file)
        txt=msg.body
        bloblist.append(tb(txt))
    return bloblist

Ejemplo n.º 3

0

Mostrar archivo

Archivo: msg.py Proyecto: sk4la/plast

    def run(self):
        """
        .. py:function:: run(self)

        Main entry point for the module.

        :param self: current class instance
        :type self: class
        """

        for evidence in self.feed:
            msg = ExtractMsg.Message(evidence)

            for attachment in msg.attachments:
                print(attachment.shortFilename)

            print("Sender: {}".format(msg.sender))
            print("Sent On: {}".format(msg.date))
            print("Subject: {}".format(msg.subject))
            print("Body: {}".format(msg.body))

Ejemplo n.º 4

0

Mostrar archivo

def removerAcentosECaracteresEspeciais(palavra):

    # Unicode normalize transforma um caracter em seu equivalente em latin.
    nfkd = unicodedata.normalize('NFKD', palavra)
    palavraSemAcento = u"".join(
        [c for c in nfkd if not unicodedata.combining(c)])

    # Usa expressão regular para retornar a palavra apenas com números, letras e espaço
    return re.sub('[^a-zA-Z]', ' ', palavraSemAcento)


array = []

for message in glob.glob('phishing/português/*.msg'):
    #print 'Reading', message
    msg = ExtractMsg.Message(message)
    body = msg._getStringStream('__substg1.0_1000')
    sender = msg._getStringStream('__substg1.0_0C1F')
    array.append(body)
    #print (str(body))
msgarray = pd.DataFrame(array)
msgarray['Phishing'] = 1
msgarray.columns = ["Message", "Phishing"]

array2 = []
for message in glob.glob('ham/*.msg'):
    #print 'Reading', message
    msg = ExtractMsg.Message(message)
    body = msg._getStringStream('__substg1.0_1000')
    sender = msg._getStringStream('__substg1.0_0C1F')
    array2.append(body)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: looper.py Proyecto: gabll/msg-extractor

import glob
import ExtractMsg

for filename in glob.iglob('*.msg'):
    try:
        ExtractMsg.Message(filename).simple_save()
    except:
        print 'Read of ', filename, ' failed!'

print 'All done!'

Ejemplo n.º 6

0

Mostrar archivo

Archivo: outlook.py Proyecto: yzx-fish/Inboxscanner

def scan_inbox():
    '''
    docstring
    '''
    global CONFIG
    foldertree = CONFIG["foldertree"]
    outlook = Dispatch("Outlook.Application")
    mapi = outlook.GetNamespace("MAPI")
    cwd = os.getcwd()
    processed = []
    matchfound = False

    class Oli():
        def __init__(self, outlook_object):
            self._obj = outlook_object

        def items(self):
            array_size = self._obj.Count
            for item_index in xrange(1, array_size + 1):
                yield (item_index, self._obj[item_index])

        def prop(self):
            return sorted(self._obj._prop_map_get_.keys())

    rules = None

    def loadrules():
        '''
    docstring
    '''
        rules = load_yara("rules")
        rulecount = 0
        for r in rules:
            rulecount += 1
        print("Loaded " + str(rulecount) + " YARA rules.")
        folderindex = 0

    loadrules()
    loadioc()
    #this needs fixing :/
    outlookfolder = mapi.Folders
    for inx, folder in Oli(outlookfolder).items():
        if folder.Name == foldertree[0]:
            outlookfolder = folder
            print(">" + folder.Name + ":")
            break
    for currentfolder in foldertree[1:]:
        for inx, folder in Oli(outlookfolder.Folders).items():
            if folder.Name == currentfolder:
                print("\t>> " + folder.Name)
                outlookfolder = folder
                folderindex = inx
            # break

    try:
        os.mkdir(cwd + "\\workdir")
    except Exception:
        pass
    # https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook._mailitem?view = outlook-pia
    for msg in outlookfolder.Items:
        try:
            for attachment in msg.Attachments:
                if attachment.FileName.startswith(
                        "Scan_results") and hashlib.sha256(
                            msg.Body).hexdigest() not in processed:
                    print("Removed Scan_results")
                    msg.Attachments.Remove(attachment.Index)
                    msg.Save()
        except Exception as e:
            print(e)
    #raw_input("Press any key to start scanning, Outlook will ask you for grant permission to access the Inbox...")
    while True:
        print(
            "[" + datetime.datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d %H:%M:%S') +
            "] Inbox Scan started for " + "/".join(foldertree).strip("/"))
        print("Scanning folder: /".join(foldertree).strip("/"))
        for msg in outlookfolder.Items:
            try:

                msgsha256 = hashlib.sha256(msg.Body).hexdigest()
                if msgsha256 in processed:
                    continue
                else:
                    processed.append(msgsha256)

                yarascan = ''
                external_scan_result = ''
                msgmeta = ''
                yaramatches = None
                senderdomain = sendertxt = senderspf = ''
                try:
                    msgmeta += "SenderEmailAddress:\t" + \
                        unidecode.unidecode(msg.SenderEmailAddress)+"\n"
                    msgmeta += "To:\t" + unidecode.unidecode(msg.To) + "\n"
                    msgmeta += "Subject:\t" + \
                        unidecode.unidecode(msg.Subject)+"\n"
                    msgmeta += "CC:\t" + unidecode.unidecode(msg.CC) + "\n"
                    msgmeta += "Categories:\t" + \
                        unidecode.unidecode(msg.Categories)+"\n"
                except AttributeError:
                    pass

                print("-" * 80)
                print(msgmeta)

                if msg.Attachments.Count > 0:
                    #print "."
                    for attachment in msg.Attachments:
                        # print attachment
                        #print attachment.GetTemporaryFilePath()
                        #print attachment.FileName
                        if attachment.FileName:
                            attachment.SaveAsFile(cwd + "\\workdir\\" +
                                                  attachment.FileName)
                            try:
                                yaramatches = rules.match(cwd + "\\workdir\\" +
                                                          attachment.FileName)
                            except Exception as e:
                                pass
                            if yaramatches:
                                matchfound = True
                                yarascan += print_yara(
                                    yaramatches,
                                    context="Attachment match:" +
                                    str(attachment.FileName)[:40],
                                    msg=msgmeta)
                                with open(
                                        cwd + "\\workdir\\" +
                                        attachment.FileName, "rb") as f:
                                    scan_result, matchfound = external_scans(
                                        f.read(), binary=True)
                                    external_scan_result += scan_result
                            if attachment.FileName.lower().endswith(".msg"):
                                msgdata = ExtractMsg.process_msg(
                                    cwd + "\\workdir\\" + attachment.FileName)
                                m = "MSG found:"+cwd+"\\workdir\\"+attachment.FileName+"\n\tSubject:" + \
                                    str(msgdata["subject"])+"\n\tTo:"+str(msgdata["to"])+"\n\tFrom:"+str(
                                        msgdata["from"])+"\n\tDate:"+str(msgdata["date"])
                                msgmeta += m + "\n"

                                print("MSG attachment:")
                                print(m)
                                # print(msgdata["body"])
                                try:
                                    yaramatches = rules.match(
                                        data=msgdata["body"])
                                except Exception as e:
                                    pass
                                    # traceback.print_exc()
                                if yaramatches:
                                    yarascan += print_yara(
                                        yaramatches,
                                        context="MSG attachment body match",
                                        msg=msgmeta)
                                    matchfound = True
                                scan_result, matchfound = external_scans(
                                    msgdata["body"], vt=False)
                                external_scan_result += scan_result
                                if not None is msgdata:
                                    for msgattachment in msgdata[
                                            'attachments']:
                                        # print("Attachment:"+msgattachment["filename"])
                                        with open(
                                                cwd + "\\workdir\\__" +
                                                attachment.FileName + "__" +
                                                msgattachment["filename"],
                                                "wb+") as f:
                                            f.write(
                                                base64.b64decode(
                                                    msgattachment['data']))
                                        try:
                                            yaramatches = rules.match(
                                                cwd + "\\workdir\\__" +
                                                attachment.FileName + "__" +
                                                msgattachment["filename"])
                                        except Exception as e:
                                            pass
                                        if yaramatches:
                                            matchfound = True
                                            yarascan += print_yara(
                                                yaramatches,
                                                msg=msgmeta,
                                                context=
                                                "Attachment extracted from MSG attachment matched: "
                                                +
                                                str(attachment.FileName +
                                                    "__" +
                                                    msgattachment["filename"])
                                                [:64])
                                            with open(
                                                    cwd + "\\workdir\\__" +
                                                    attachment.FileName +
                                                    "__" +
                                                    msgattachment["filename"],
                                                    "rb") as f:
                                                scan_result, matchfound = external_scans(
                                                    f.read(), binary=True)
                                                external_scan_result += scan_result
                                else:
                                    print("MSGdata is none")
                hbody = unidecode.unidecode(msg.HTMLBody)
                body = unidecode.unidecode(msg.Body)
                for line in hbody.splitlines():
                    if "x-originating-ip" in line.lower():
                        print line

                try:
                    yaramatches = rules.match(data=hbody)
                    yaramatches = rules.match(data=body)
                except Exception as e:
                    pass

                if yaramatches:
                    print '-' * 80
                    matchfound = True
                    yarascan += print_yara(yaramatches,
                                           context="HTMLBody matched",
                                           msg=msgmeta,
                                           showstrings=True)
                    print '-' * 80

            # if yaramatches:
            #  print '-'*80
            #yarascan += print_yara(yaramatches,context = "Plain body matched",msg = msgmeta,showstrings = True)
            # print '-'*80

                scan_result, matchfound = external_scans(hbody, vt=False)
                external_scan_result += scan_result
                #print (external_scan_result)
                header = '''
                <html>
                <body>
                '''
                footer = '''
                </body>
                </html>
                '''
                if senderspf:
                    header += "<h1>SPF records</h1></b>"
                    header += "<h3>Domain</h3>:" + senderdomain
                    header += "</br>SPF records:" + senderspf
                if yarascan.strip():
                    yarascan = "<h1>Yara matches</h1></br><pre>" + yarascan + "</pre><hr>"
                scanres = header + yarascan + "</br>\n" + external_scan_result
                resfile = ''
                if matchfound:
                    resfile = "Scan_results_matchfound.htm"
                else:
                    resfile = "Scan_results_nomatches.htm"

                if scanres:
                    with open(cwd + "\\" + resfile, "w+") as f:
                        f.write(scanres)
                    msg.Attachments.Add(cwd + "\\" + resfile, 1, 1, resfile)
                    msg.Save()
            except Exception as e:
                print e
                traceback.print_exc()
        print(
            "[" + datetime.datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d %H:%M:%S') +
            "] Done scanning,sleeping for " + str(CONFIG["scan_interval"]) +
            " seconds...")
        time.sleep(int(CONFIG["scan_interval"]))
        loadconfig()
        loadioc()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: Extract_email_data_from_msg_files.py Proyecto: KelumPerera/ImportantPythonCodeSnippets

f = glob.glob(path + "/*" + input_file_identifier)

# Otherwise use
# f = glob.glob(r'C:\Users\kelum\Documents\EmailTest\*.msg')

# Create an emplty dataframe
emailcolumns = [
    'Msg_Sender', 'Msg_Date', 'Msg_To', 'Msg_CC', 'Msg_Attachments',
    'Msg_Subj', 'Msg_Body'
]
email_data = pd.DataFrame(columns=emailcolumns)

# For loop to extract data from the emails in the defined path and insert into empty dataframe
for filename in f:
    msg = ExtractMsg.Message(filename)
    msg_sender = msg.sender
    msg_date = msg.date
    msg_To = msg.to
    msg_CC = msg.cc
    msg_Attachments = msg.attachments
    msg_subj = msg.subject
    msg_message = msg.body
    msg_List = [[
        msg_sender, msg_date, msg_To, msg_CC, msg_Attachments, msg_subj,
        msg_message
    ]]
    email_data1 = pd.DataFrame(msg_List, columns=emailcolumns)
    email_data = email_data.append(email_data1, ignore_index=True)

# Write the dataframe into a excel worksheet.