def parse_msg(filename): msg = ExtractMsg.Message(filename) attachments = get_attachments_json(msg) guid = str(uuid.uuid1()) senders, senders_line = addrs( re.split('[,;]', str_to_unicode(msg.sender, default=u"NoSender"))) tos, tos_line = addrs( re.split('[,;]', str_to_unicode(msg.to, default=u"NoRcvr"))) ccs, ccs_line = addrs(re.split('[,;]', str_to_unicode(msg.cc))) try: date = dateToUTCstr(msg.date) except: print "FAILED to parse msg date. Setting date to default value for filename {}".format( str(filename)) date = "2010-01-01T00:00:00" return { 'id': guid, 'senders_line': senders_line, 'senders': senders, 'tos_line': tos_line, 'tos': tos, 'ccs_line': ccs_line, 'ccs': ccs, 'bccs_line': '', 'bccs': [], 'subject': str_to_unicode(msg.subject), 'datetime': date, 'attachments': attachments, 'body': str_to_unicode(msg.body) # TODO make sure this is noo needed # 'body': imap_utf7.decode(msg.body) }
def doc_list(): bloblist = [] d = path.dirname(__file__) parent_dir = path.abspath(d + "/../") path1=path.join(parent_dir,r'module\db\New\*.msg' ) files = glob.glob(path1) for file in files: #files[:10]: msg = ExtractMsg.Message(file) txt=msg.body bloblist.append(tb(txt)) return bloblist
def run(self): """ .. py:function:: run(self) Main entry point for the module. :param self: current class instance :type self: class """ for evidence in self.feed: msg = ExtractMsg.Message(evidence) for attachment in msg.attachments: print(attachment.shortFilename) print("Sender: {}".format(msg.sender)) print("Sent On: {}".format(msg.date)) print("Subject: {}".format(msg.subject)) print("Body: {}".format(msg.body))
def removerAcentosECaracteresEspeciais(palavra): # Unicode normalize transforma um caracter em seu equivalente em latin. nfkd = unicodedata.normalize('NFKD', palavra) palavraSemAcento = u"".join( [c for c in nfkd if not unicodedata.combining(c)]) # Usa expressão regular para retornar a palavra apenas com números, letras e espaço return re.sub('[^a-zA-Z]', ' ', palavraSemAcento) array = [] for message in glob.glob('phishing/português/*.msg'): #print 'Reading', message msg = ExtractMsg.Message(message) body = msg._getStringStream('__substg1.0_1000') sender = msg._getStringStream('__substg1.0_0C1F') array.append(body) #print (str(body)) msgarray = pd.DataFrame(array) msgarray['Phishing'] = 1 msgarray.columns = ["Message", "Phishing"] array2 = [] for message in glob.glob('ham/*.msg'): #print 'Reading', message msg = ExtractMsg.Message(message) body = msg._getStringStream('__substg1.0_1000') sender = msg._getStringStream('__substg1.0_0C1F') array2.append(body)
import glob import ExtractMsg for filename in glob.iglob('*.msg'): try: ExtractMsg.Message(filename).simple_save() except: print 'Read of ', filename, ' failed!' print 'All done!'
def scan_inbox(): ''' docstring ''' global CONFIG foldertree = CONFIG["foldertree"] outlook = Dispatch("Outlook.Application") mapi = outlook.GetNamespace("MAPI") cwd = os.getcwd() processed = [] matchfound = False class Oli(): def __init__(self, outlook_object): self._obj = outlook_object def items(self): array_size = self._obj.Count for item_index in xrange(1, array_size + 1): yield (item_index, self._obj[item_index]) def prop(self): return sorted(self._obj._prop_map_get_.keys()) rules = None def loadrules(): ''' docstring ''' rules = load_yara("rules") rulecount = 0 for r in rules: rulecount += 1 print("Loaded " + str(rulecount) + " YARA rules.") folderindex = 0 loadrules() loadioc() #this needs fixing :/ outlookfolder = mapi.Folders for inx, folder in Oli(outlookfolder).items(): if folder.Name == foldertree[0]: outlookfolder = folder print(">" + folder.Name + ":") break for currentfolder in foldertree[1:]: for inx, folder in Oli(outlookfolder.Folders).items(): if folder.Name == currentfolder: print("\t>> " + folder.Name) outlookfolder = folder folderindex = inx # break try: os.mkdir(cwd + "\\workdir") except Exception: pass # https://docs.microsoft.com/en-us/dotnet/api/microsoft.office.interop.outlook._mailitem?view = outlook-pia for msg in outlookfolder.Items: try: for attachment in msg.Attachments: if attachment.FileName.startswith( "Scan_results") and hashlib.sha256( msg.Body).hexdigest() not in processed: print("Removed Scan_results") msg.Attachments.Remove(attachment.Index) msg.Save() except Exception as e: print(e) #raw_input("Press any key to start scanning, Outlook will ask you for grant permission to access the Inbox...") while True: print( "[" + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') + "] Inbox Scan started for " + "/".join(foldertree).strip("/")) print("Scanning folder: /".join(foldertree).strip("/")) for msg in outlookfolder.Items: try: msgsha256 = hashlib.sha256(msg.Body).hexdigest() if msgsha256 in processed: continue else: processed.append(msgsha256) yarascan = '' external_scan_result = '' msgmeta = '' yaramatches = None senderdomain = sendertxt = senderspf = '' try: msgmeta += "SenderEmailAddress:\t" + \ unidecode.unidecode(msg.SenderEmailAddress)+"\n" msgmeta += "To:\t" + unidecode.unidecode(msg.To) + "\n" msgmeta += "Subject:\t" + \ unidecode.unidecode(msg.Subject)+"\n" msgmeta += "CC:\t" + unidecode.unidecode(msg.CC) + "\n" msgmeta += "Categories:\t" + \ unidecode.unidecode(msg.Categories)+"\n" except AttributeError: pass print("-" * 80) print(msgmeta) if msg.Attachments.Count > 0: #print "." for attachment in msg.Attachments: # print attachment #print attachment.GetTemporaryFilePath() #print attachment.FileName if attachment.FileName: attachment.SaveAsFile(cwd + "\\workdir\\" + attachment.FileName) try: yaramatches = rules.match(cwd + "\\workdir\\" + attachment.FileName) except Exception as e: pass if yaramatches: matchfound = True yarascan += print_yara( yaramatches, context="Attachment match:" + str(attachment.FileName)[:40], msg=msgmeta) with open( cwd + "\\workdir\\" + attachment.FileName, "rb") as f: scan_result, matchfound = external_scans( f.read(), binary=True) external_scan_result += scan_result if attachment.FileName.lower().endswith(".msg"): msgdata = ExtractMsg.process_msg( cwd + "\\workdir\\" + attachment.FileName) m = "MSG found:"+cwd+"\\workdir\\"+attachment.FileName+"\n\tSubject:" + \ str(msgdata["subject"])+"\n\tTo:"+str(msgdata["to"])+"\n\tFrom:"+str( msgdata["from"])+"\n\tDate:"+str(msgdata["date"]) msgmeta += m + "\n" print("MSG attachment:") print(m) # print(msgdata["body"]) try: yaramatches = rules.match( data=msgdata["body"]) except Exception as e: pass # traceback.print_exc() if yaramatches: yarascan += print_yara( yaramatches, context="MSG attachment body match", msg=msgmeta) matchfound = True scan_result, matchfound = external_scans( msgdata["body"], vt=False) external_scan_result += scan_result if not None is msgdata: for msgattachment in msgdata[ 'attachments']: # print("Attachment:"+msgattachment["filename"]) with open( cwd + "\\workdir\\__" + attachment.FileName + "__" + msgattachment["filename"], "wb+") as f: f.write( base64.b64decode( msgattachment['data'])) try: yaramatches = rules.match( cwd + "\\workdir\\__" + attachment.FileName + "__" + msgattachment["filename"]) except Exception as e: pass if yaramatches: matchfound = True yarascan += print_yara( yaramatches, msg=msgmeta, context= "Attachment extracted from MSG attachment matched: " + str(attachment.FileName + "__" + msgattachment["filename"]) [:64]) with open( cwd + "\\workdir\\__" + attachment.FileName + "__" + msgattachment["filename"], "rb") as f: scan_result, matchfound = external_scans( f.read(), binary=True) external_scan_result += scan_result else: print("MSGdata is none") hbody = unidecode.unidecode(msg.HTMLBody) body = unidecode.unidecode(msg.Body) for line in hbody.splitlines(): if "x-originating-ip" in line.lower(): print line try: yaramatches = rules.match(data=hbody) yaramatches = rules.match(data=body) except Exception as e: pass if yaramatches: print '-' * 80 matchfound = True yarascan += print_yara(yaramatches, context="HTMLBody matched", msg=msgmeta, showstrings=True) print '-' * 80 # if yaramatches: # print '-'*80 #yarascan += print_yara(yaramatches,context = "Plain body matched",msg = msgmeta,showstrings = True) # print '-'*80 scan_result, matchfound = external_scans(hbody, vt=False) external_scan_result += scan_result #print (external_scan_result) header = ''' <html> <body> ''' footer = ''' </body> </html> ''' if senderspf: header += "<h1>SPF records</h1></b>" header += "<h3>Domain</h3>:" + senderdomain header += "</br>SPF records:" + senderspf if yarascan.strip(): yarascan = "<h1>Yara matches</h1></br><pre>" + yarascan + "</pre><hr>" scanres = header + yarascan + "</br>\n" + external_scan_result resfile = '' if matchfound: resfile = "Scan_results_matchfound.htm" else: resfile = "Scan_results_nomatches.htm" if scanres: with open(cwd + "\\" + resfile, "w+") as f: f.write(scanres) msg.Attachments.Add(cwd + "\\" + resfile, 1, 1, resfile) msg.Save() except Exception as e: print e traceback.print_exc() print( "[" + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') + "] Done scanning,sleeping for " + str(CONFIG["scan_interval"]) + " seconds...") time.sleep(int(CONFIG["scan_interval"])) loadconfig() loadioc()
f = glob.glob(path + "/*" + input_file_identifier) # Otherwise use # f = glob.glob(r'C:\Users\kelum\Documents\EmailTest\*.msg') # Create an emplty dataframe emailcolumns = [ 'Msg_Sender', 'Msg_Date', 'Msg_To', 'Msg_CC', 'Msg_Attachments', 'Msg_Subj', 'Msg_Body' ] email_data = pd.DataFrame(columns=emailcolumns) # For loop to extract data from the emails in the defined path and insert into empty dataframe for filename in f: msg = ExtractMsg.Message(filename) msg_sender = msg.sender msg_date = msg.date msg_To = msg.to msg_CC = msg.cc msg_Attachments = msg.attachments msg_subj = msg.subject msg_message = msg.body msg_List = [[ msg_sender, msg_date, msg_To, msg_CC, msg_Attachments, msg_subj, msg_message ]] email_data1 = pd.DataFrame(msg_List, columns=emailcolumns) email_data = email_data.append(email_data1, ignore_index=True) # Write the dataframe into a excel worksheet.