def test_decode(tnefspec): fn, key, attchs, objs = tnefspec with open(datadir + os.sep + fn, "rb") as tfile: t = TNEF(tfile.read()) assert t.key == key, "wrong key: 0x%2.2x" % t.key assert objcodes(t) == objs, "wrong objs: %s" % ["0x%2.2x" % o.name for o in t.objects] assert [a.name for a in t.attachments] == attchs
def parseFile(): data = request.data with open("data.dat", "wb") as f: f.write(base64.urlsafe_b64decode(data)) with open("data.dat", "rb") as tneffile: tnefParse = TNEF(tneffile.read()) response = jsonify({"html": tnefParse.htmlbody}) response.headers.add('Access-Control-Allow-Origin', '*') return response
def test_decode(tnefspec): fn, key, attchs, objs = tnefspec with open(datadir + os.sep + fn, "rb") as tfile: t = TNEF(tfile.read()) assert t.key == key, "wrong key: 0x%2.2x" % t.key assert objcodes(t) == objs, "wrong objs: %s" % [ "0x%2.2x" % o.name for o in t.objects ] # TODO: which encoding should we decode from below? assert [a.name.decode() for a in t.attachments] == attchs assert [a.long_filename() for a in t.attachments] == attchs
def test_decode(tnefspec): fn, key, attchs, body, objs = tnefspec with open(datadir + os.sep + fn, "rb") as tfile: t = TNEF(tfile.read()) assert t.key == key, "wrong key: 0x%2.2x" % t.key for m in t.mapiprops: assert m.__str__() assert m.data is not None for i, a in enumerate(t.attachments): assert a.long_filename() == attchs[i] for m in a.mapi_attrs: assert m.__str__() assert m.data is not None for m in t.msgprops: assert m.__str__() assert m.data is not None if m.name == TNEF.ATTRECIPTABLE: for n_m in m.data[0]: assert isinstance(n_m, TNEFMAPI_Attribute) if t.htmlbody: assert 'html' in t.htmlbody if body: assert getattr(t, body) assert t.has_body() else: assert not t.has_body() if t.rtfbody: assert t.rtfbody[0:5] == b'{\\rtf' if objs: assert objcodes(t) == objs, "wrong objs: %s" % ["0x%2.2x" % o.name for o in t.objects] assert t.dump(True) assert t.dump(False)
def _read_tnef(self): # TODO: does this work in non-multipart? payloads = self.message.get_payload() if not isinstance(payloads, list): return for payload in payloads: if payload.get_content_type() == "application/ms-tnef": # TODO: skip renamed winmail.dat data = base64.b64decode(payload.get_payload()) self.tnef_payload = payload self.tnef_message = TNEF(data) return
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: extracted: List[ExtractedPayload] = [] tnef_results = TNEF(payload.content) if tnef_results.attachments: for tnef_attachment in tnef_results.attachments: try: filename = UnicodeDammit( tnef_attachment.name).unicode_markup except: filename = "None" tnef_meta = PayloadMeta(extra_data={'filename': filename}) extracted.append( ExtractedPayload(tnef_attachment.data, tnef_meta)) return WorkerResponse(extracted=extracted)
def parseFile(request): data = request.data with open("/tmp/data.dat", "wb") as f: f.write(base64.urlsafe_b64decode(data)) with open("/tmp/data.dat", "rb") as tneffile: tnefParse = TNEF(tneffile.read()) if (tnefParse.htmlbody == None): if (tnefParse.rtfbody != None): #doc = pandoc.Document("/tmp/temp") #doc.rtf = tnefParse.rtfbody; #response = jsonify({"html":doc.html5}) temp = tnefParse.rtfbody.decode() sendbod2 = ''.join(str(s) for s in temp) sendbod = sendbod2.rstrip('\r').rstrip('\n') #sendbod = temp #temp = decompress(tnefParse.rtfbody) #print(temp) #for x in temp: # sendbod += str(x) print(sendbod) #sendData = {"headers": {'Accept': '*/*', 'Content-Type': 'text/plain'},"body":sendbod} url1 = "https://us-central1-igneous-sweep-257100.cloudfunctions.net/rtfConvert2" r = requests.post(url=url1, json={"body": sendbod}) #publisher = pubsub_v1.PublisherClient() #topic_path = publisher.topic_path("igneous-sweep-257100", "rtf-HTML") #future = publisher.publish(topic_path, data=tnefParse.rtfbody) #print(future.result()) trunc = r.text.split("\n", 1)[1] response = jsonify({"rtf": trunc}) response.headers.add('Access-Control-Allow-Origin', '*') return response else: response = jsonify({"html": tnefParse.body}) response.headers.add('Access-Control-Allow-Origin', '*') return response #strData = tnefParse.htmlbody#.decode('windows-1252') #strData = tnefParse.htmlbody.decode('latin-1', 'replace') #strData = tnefParse.htmlbody.decode('raw_unicode_escape') # strData = base64.b64encode(tnefParse.htmlbody) #strData = html.unescape(tnefParse.htmlbody) response = jsonify({"html": tnefParse.htmlbody}) # response = jsonify(html=tnefParse.htmlbody) response.headers.add('Access-Control-Allow-Origin', '*') return response
def demux_tnef(filename, options): retlist = [] if not HAS_TNEFFILE: return retlist try: ext = os.path.splitext(filename)[1] if ext != ".dat" and ext != "" and ext != ".bin": return retlist extracted = [] options = Config() tmp_path = options.cuckoo.get("tmppath", "/tmp") target_path = os.path.join(tmp_path, "cuckoo-tnef-tmp") if not os.path.exists(target_path): os.mkdir(target_path) with open(filename, "rb") as tfile: t = TNEF(tfile.read()) for a in t.attachments: base, ext = os.path.splitext(a.name) basename = os.path.basename(a.name) ext = ext.lower() if ext == "" and len(basename) and basename[0] == ".": continue for theext in demux_extensions_list: if ext == theext: tmp_dir = tempfile.mkdtemp(prefix='cuckootnef_', dir=target_path) fullpath = os.path.join(tmp_dir, a.name) with open(fullpath, 'wb') as fp: fp.write(a.data) retlist.append( os.path.join(tmp_dir, a.name.replace("\\", "/"))) except: pass return retlist
def test_decode(tnefspec): fn, key, attchs, body, objs = tnefspec with open(datadir + os.sep + fn, "rb") as tfile: t = TNEF(tfile.read()) assert t.key == key, "wrong key: 0x%2.2x" % t.key for m in t.mapiprops: assert m.__str__() assert m.data is not None for i, a in enumerate(t.attachments): assert a.long_filename() == attchs[i] for m in a.mapi_attrs: assert m.__str__() assert m.data is not None for m in t.msgprops: assert m.__str__() assert m.data is not None if m.name == TNEF.ATTRECIPTABLE: for n_m in m.data[0]: assert isinstance(n_m, TNEFMAPI_Attribute) if t.htmlbody: assert 'html' in t.htmlbody if body: assert getattr(t, body) assert t.has_body() else: assert not t.has_body() if t.rtfbody: assert t.rtfbody[0:5] == b'{\\rtf' if objs: assert objcodes(t) == objs, "wrong objs: %s" % [ "0x%2.2x" % o.name for o in t.objects ] assert t.dump(True) assert t.dump(False)
def test_decode(tnefspec): fn, key, attchs, body, objs = tnefspec t = TNEF((DATADIR / fn).read_bytes()) assert t.key == key, f"wrong key: 0x{t.key:2.2x}" for m in t.mapiprops: assert m.__str__() assert m.data is not None for i, a in enumerate(t.attachments): assert a.long_filename() == attchs[i] assert type(a.data) is bytes for m in a.mapi_attrs: assert m.__str__() assert m.data is not None for m in t.msgprops: assert m.__str__() assert m.data is not None if m.name == TNEF.ATTRECIPTABLE: for n_m in m.data[0]: assert isinstance(n_m, TNEFMAPI_Attribute) if t.htmlbody: assert 'html' in t.htmlbody if body: assert getattr(t, body) assert t.has_body() else: assert not t.has_body() if t.rtfbody: assert t.rtfbody[0:5] == b'{\\rtf' if objs: assert objcodes(t) == objs, "wrong objs: " + str([f"0x{o.name:2.2x}" for o in t.objects]) assert t.dump(True) assert t.dump(False)
def printkek(): with open("winmail.dat", "rb") as tneffile: tnefobj = TNEF(tneffile.read()) return tnefobj.htmlbody
def eml2str(msg): if type(msg) == bytes: msg = email.message_from_bytes(msg) elif type(msg) == str: msg = email.message_from_string(msg) elif type(msg) != email.message.Message: eprint(type(msg)) text = [] textlen = 0 #pp = msg.get_payload() for p in msg.walk(): # print p.get_content_type() charset = p.get_content_charset("utf-8") # print("charset='%s'"%charset) if not charset: charset = "iso8859-2" elif charset == "cp-850": charset = "cp850" elif charset == "_iso-2022-jp$esc": charset = "iso-2022-jp" elif charset == "iso-8859-8-i": charset = "iso-8859-8" elif charset == "windows-874": charset = "cp874" elif charset == "x-mac-ce": charset = "maccentraleurope" elif charset[0:4] == "utf8": charset = "utf-8" ctyp = p.get_content_type().lower() fnev = hdrdecode(str(p.get_filename())).lower() disp = p.get_content_disposition() # print((ctyp,disp,fnev)) if ctyp.split('/')[0] == "text" and disp != "attachment": # print(ctyp) # if ctyp.find("rfc")>=0: # continue try: data = p.get_payload(decode=True) try: data = data.decode(charset, 'mixed') except: data = data.decode("utf-8", 'mixed') data = xmldecode(data) # plaintextre is rafer... ldata = data.lower() if ctyp == "text/html" or ctyp == "text/xml" or data.find( '<') >= 0 and (ldata.find("<body") >= 0 or ldata.find("<img") >= 0 or ldata.find("<style") >= 0 or ldata.find("<center") >= 0 or ldata.find("<a href") >= 0): # print(data.encode("iso8859-2")) # print("parsing html...") p = ldata.find("<body") if p > 0: data = data[p:] data = html2text(data) text.append(data) elif ctyp == "text/plain": text.append(data) if textlen < len(data): textlen = len(data) except: eprint(traceback.format_exc()) elif textlen < 200: s = "" t0 = time.time() try: if (ctyp == "application/pdf" or fnev.endswith(".pdf")) and pdf_support: eprint("PDF: parsing file: " + fnev) if pdf_support == "pdfminer": s = pdfminer.high_level.extract_text(io.BytesIO( p.get_payload(decode=True)), maxpages=3) elif pdf_support == "pdftotext": pdf = pdftotext.PDF( io.BytesIO(p.get_payload(decode=True))) for page in pdf: s = str(page) if len(s) > 200: break elif (ctyp == "application/rtf" or fnev.endswith(".rtf")) and rtf_support: eprint("RTF: parsing file: " + fnev) s = rtf_to_text( p.get_payload(decode=True).decode("utf-8", "ignore")) elif ctyp == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or fnev.endswith( ".docx"): eprint("DOCX: parsing file: " + fnev) zipf = zipfile.ZipFile( io.BytesIO(p.get_payload(decode=True))) html = zipf.read('word/document.xml').decode("utf-8") for ret in html.split("<"): try: tag, txt = ret.split(">", 1) tag1 = tag.split()[0] except: continue if tag1 == "w:t": s += txt elif tag1 in ["w:tab", "w:br", "w:cr", "w:p"]: s += "\t" elif (ctyp == "application/ms-tnef" or fnev == "winmail.dat") and tnef_support: # print("TNEF: parsing file: "+fnev+" from "+msg.get("Message-id","N/A")) eprint("TNEF: parsing file: " + fnev) tnefobj = TNEF(p.get_payload(decode=True)) tnefcp = tnefobj.codepage if tnefobj.codepage else "cp1250" # if tnefobj.body: # print("TNEF.raw: %d" %(len(tnefobj.body))) if tnefobj.htmlbody: # print("TNEF.html: %d" %(len(tnefobj.htmlbody))) # print(type(tnefobj.htmlbody)) # if b"charset=utf-8" in tnefobj.htmlbody: try: s = html2text( tnefobj.htmlbody.decode("utf-8", "strict")) # print("UTF8 detected in TNEF/HTML...") except: s = html2text( tnefobj.htmlbody.decode(tnefcp, "ignore")) elif tnef_support > 1 and tnefobj.rtfbody: # print(type(tnefobj.rtfbody)) # print("TNEF.rtf: %d" %(len(tnefobj.rtfbody))) s = rtf_to_text( tnefobj.rtfbody.decode(tnefcp, "ignore")) t0 = time.time() - t0 print(s) if len(s) > 50: eprint("parsed: %d chars, %d ms" % (len(s), t0 * 1000)) text.append(s) except: eprint(traceback.format_exc()) return text
def scan(self, payload, **kwargs): if not payload: self.log.warn( "SMTP session is empty. Do you have permission to the source?") return False extracted_urls = None extracted_ips = None # Grab the uuid of so we can pass it off to the attachment uuid = kwargs.get('uuid', [self.stoq.get_uuid]) # Get the appropriate metadata from the vortex filename vortex_meta = self.vortex_metadata(kwargs['filename']) # If vortex_meta returns False, it means the payload being analyzed is # the client session, which contains useless information. Let's just # skip it. if vortex_meta is False: self.log.debug("Vortex client sessions provided, skipping...") return True # Iterate over each e-mail session for email_session in self.carve_email(payload): email_session = self.stoq.force_unicode(email_session) message_json = {} message = pyzmail.message_from_string(email_session) if vortex_meta: # Setup our primary message json blob message_json = vortex_meta.copy() message_json['vortex_filename'] = kwargs['filename'] # Create a dict of the headers in the session for k, v in list(message.items()): curr_header = k.lower() if curr_header in message_json: # If the header key already exists, let's join them message_json[curr_header] += "\n{}".format( message.get_decoded_header(k)) else: message_json[curr_header] = message.get_decoded_header(k) # Extract the e-mail body, to include HTML if available if message.text_part is not None: message_json['body'] = self.stoq.force_unicode( message.text_part.get_payload()) else: message_json['body'] = "" if message.html_part is not None: message_json['body_html'] = self.stoq.force_unicode( message.html_part.get_payload()) else: message_json['body_html'] = "" # Make this easy, merge both text and html body within e-mail # for the purpose of extracting any URIs email_body = "{}{}".format(message_json['body'], message_json['body_html']) # Extract and normalize any IP addresses in headers if self.extract_iocs: # str of concatenated ip_headers concat_ips = "" # Define which headers we want to extract IP addresses from ip_headers = [ 'src_ip', 'dest_ip', 'received', 'x-orig-ip', 'x-originating-ip', 'x-remote-ip', 'x-sender-ip' ] # concat all of our headers into one string for easy searching for ip_header in ip_headers: if ip_header in message_json: concat_ips += message_json[ip_header] extracted_ips = self.readers['iocregex'].read( concat_ips, datatype_flag='ipv4') # Let's get a unique list of IP addresses from extracted data if 'ipv4' in extracted_ips: message_json['ips'] = extracted_ips['ipv4'] # extract and normalize any URLs found extracted_urls = self.readers['iocregex'].read( email_body, datatype_flag='url') # Extract any URLs that may be in the merged body if 'url' in extracted_urls: message_json['urls'] = extracted_urls['url'] # Handle attachments message_json['att'] = [] for mailpart in message.mailparts: try: filename = mailpart.filename except TypeError: filename = "None" # This is a check for winmail.dat files. If successful, # skip_attachment will be True and we will use the # results from that instead of winmail.dat file itself. skip_attachment = False if mailpart.type == "text/plain": try: message_json['body'] += self.stoq.force_unicode( mailpart.get_payload()) except: pass skip_attachment = True else: if filename == "winmail.dat": tnef_results = TNEF(mailpart.get_payload()) # we have data, let's handle it. if tnef_results.attachments: # We have a valid file within winmail.dat, # let's make sure we only handle it here. skip_attachment = True for tnef_attachment in tnef_results.attachments: try: filename = self.stoq.force_unicode( tnef_attachment.name) except: filename = "None" try: attachment_json = self.handle_attachments( payload=tnef_attachment.data, filename=filename, uuid=message_json['uuid']) if attachment_json: message_json['att'].append( attachment_json) except: pass # Let's handle the attachment normally if not skip_attachment: attachment_json = self.handle_attachments( payload=mailpart.get_payload(), filename=filename, uuid=uuid) if attachment_json: attachment_json['desc'] = mailpart.part.get( 'Content-Description') attachment_json['type'] = mailpart.type message_json['att'].append(attachment_json) if self.use_bloom: # Check bloom filters for field_name, field_bloom in self.bloomfilters.items(): # If the configured field name exists in parsed data... if field_name in message_json: # extract the field value and check if it has been seen # before... field_value = message_json[field_name] seen_before = field_bloom.query_filter( field_value, add_missing=True) # Generate JSON entry key for flagging new field values field_flag = "{}_isnew".format(field_name) # if the value has not been seen before... if not seen_before: # flag it as new within JSON message_json[field_flag] = True else: message_json[field_flag] = False # Make sure we delete the body and body_html keys if they are to # be omitted if self.omit_body: message_json.pop('body', None) message_json.pop('body_html', None) yield message_json
def extractAttachment(msg, dirname, uuid): lip = socket.gethostbyname(socket.gethostname()) an = 0 base_url = '''http://%s:4000/''' % lip FlagLink = False kuku = False #print msg.get_payload() for msgrep in msg.walk(): i = 0 if msgrep.is_multipart(): payload = msgrep.get_payload() link = """<a href=3D"%s/%s/show">Somae file are here </a></head>""" % ( base_url, str(uuid)) newpayload = [] q_att = [] for attachment in payload: print attachment.get_content_type() if "html" in attachment.get_content_type(): if FlagLink: hhh = attachment.get_payload() a = hhh.split("</head>") newhhh = a[0] + link + a[1] attachment.set_payload(newhhh) att_name = attachment.get_filename(None) if att_name is not None: if att_name.lower() in ["winmail.dat", "win.dat"]: file_buf = attachment.get_payload(decode=True) winmail = TNEF(file_buf) for att in winmail.attachments: print att.name f = writeFile(att.name, att.data, dirname) a = Decrypt.Check_Encryption(f, uuid) else: f = writeFile(att_name, attachment.get_payload(decode=True), dirname) pf, a = Decrypt.Check_Encryption(f, uuid) if not a: file_buf = attachment.get_payload(decode=True) res = clamcheck_buf(file_buf) print str(res).lower() if pf or "encrypted" not in str(res).lower(): newpayload.append(attachment) else: q_att.append(att_name) else: q_att.append(att_name) FlagLink = True else: newpayload.append(attachment) i += 1 msgrep.set_payload(newpayload) if not q_att == []: elastic.UpdateAtts(q_att, uuid) #if FlagLink: # msg = ModHtml(uuid,msg) return msg.as_string()