def _process(self, file): """ Busca imágenes dentro de stream y guarda referencia a su ubicación. """ olefile = OleFile.OleFileIO(file) # Al igual que en pptx esto no es un error if not olefile.exists("Pictures"): return # raise IOError("Pictures stream not found") self.__stream = olefile.openstream("Pictures") stream = self.__stream offset = 0 while True: header = stream.read(self.headerlen) offset += self.headerlen if not header: break # cabecera rec_instance, rec_type, rec_len = struct.unpack_from( "<HHL", header) # mover a siguiente cabecera stream.seek(rec_len, 1) if DEBUG: print("%X %X %sb" % (rec_type, rec_instance, rec_len)) extrabytes, ext = formats.get((rec_type, rec_instance)) # Eliminar bytes extra rec_len -= extrabytes offset += extrabytes self._files.append((offset, rec_len)) offset += rec_len
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: extracted: List[ExtractedPayload] = [] errors: List[Error] = [] ole_object = olefile.OleFileIO(payload.content) streams = ole_object.listdir(streams=True) for stream in streams: try: stream_buffer = ole_object.openstream(stream).read() name = ''.join( filter(lambda x: x in string.printable, '_'.join(stream))) if stream_buffer.endswith(b'\x01Ole10Native'): ole_native = oleobj.OleNativeStream(stream_buffer) if ole_native.filename: name = f'{name}_{str(ole_native.filename)}' else: name = f'{name}_olenative' meta = PayloadMeta( should_archive=False, extra_data={ 'index': streams.index(stream), 'name': name }, ) extracted.append(ExtractedPayload(ole_native.data, meta)) else: meta = PayloadMeta( should_archive=False, extra_data={ 'index': streams.index(stream), 'name': name }, ) extracted.append(ExtractedPayload(stream_buffer, meta)) except Exception as err: errors.append( Error( error=str(err), plugin_name=self.plugin_name, payload_id=payload.payload_id, )) return WorkerResponse(extracted=extracted, errors=errors)
def load(self): # have we already loaded? if self.loaded: return self.loaded = True if not olefile.isOleFile(self.source_file): return False self.is_ole_file = True ole = olefile.OleFileIO(self.source_file) try: # is this document encrypted? if not ole.exists('encryptioninfo') or not ole.exists( 'encryptedpackage'): self.is_encrypted = False return self.is_encrypted = True info_stream = ole.openstream('EncryptionInfo') # is this standard, extensible or agile encryption? # agile will have an xml tag after the first 8 bytes info_stream.seek(8) xml_header = info_stream.read(5) info_stream.seek(0) if xml_header == b'<?xml': self.encryption_type = ENCRYPTION_TYPE_AGILE self.parse_agile_encryption_info(info_stream) return # initially we assume it's standard # the code to parse standard will figure out if it's extensible self.encryption_type = ENCRYPTION_TYPE_STANDARD self.parse_standard_encryption_info(info_stream) finally: ole.close()
def process_file(filepath, field_filter_mode=None): """ decides which of the process_* functions to call """ if olefile.isOleFile(filepath): logger.debug('Is OLE. Checking streams to see whether this is xls') if xls_parser.is_xls(filepath): logger.debug('Process file as excel 2003 (xls)') return process_xls(filepath) if is_ppt(filepath): logger.debug('is ppt - cannot have DDE') return u'' logger.debug('Process file as word 2003 (doc)') with olefile.OleFileIO(filepath, path_encoding=None) as ole: return process_doc(ole) with open(filepath, 'rb') as file_handle: if file_handle.read(4) == RTF_START: logger.debug('Process file as rtf') return process_rtf(file_handle, field_filter_mode) try: doctype = ooxml.get_type(filepath) logger.debug('Detected file type: {0}'.format(doctype)) except Exception as exc: logger.debug('Exception trying to xml-parse file: {0}'.format(exc)) doctype = None if doctype == ooxml.DOCTYPE_EXCEL: logger.debug('Process file as excel 2007+ (xlsx)') return process_xlsx(filepath) if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): logger.debug('Process file as xml from excel 2003/2007+') return process_excel_xml(filepath) if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003): logger.debug('Process file as xml from word 2003/2007+') return process_docx(filepath) if doctype is None: logger.debug('Process file as csv') return process_csv(filepath) # could be docx; if not: this is the old default code path logger.debug('Process file as word 2007+ (docx)') return process_docx(filepath, field_filter_mode)
def oleDump(inFile): buff = [] try: if olefile.isOleFile(inFile): with olefile.OleFileIO(inFile) as ole: buff.append(["office_author", ole.get_metadata().author.decode("utf-8")]) buff.append(["office_created", str(ole.get_metadata().create_time)]) buff.append(["office_last-saved-author", ole.get_metadata().last_saved_by.decode("utf-8")]) buff.append(["office_last-saved-time", str(ole.get_metadata().last_saved_time)]) try: buff.append(["office_title", ole.get_metadata().title.decode("utf-8")]) buff.append(["office_subject", ole.get_metadata().subject.decode("utf-8")]) except: pass if ole.get_metadata().security == 1: buff.append(["office_password","True"]) else: buff.append(["office_password","False"]) except IsADirectoryError: pass return buff
def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "doc97" self.keyTypes = ["password"] self.key = None self.salt = None # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx with ole.openstream("wordDocument") as stream: fib = _parseFib(stream) # https://msdn.microsoft.com/en-us/library/dd923367(v=office.12).aspx tablename = "1Table" if fib.base.fWhichTblStm == 1 else "0Table" Info = namedtuple("Info", ["fib", "tablename"]) self.info = Info( fib=fib, tablename=tablename, )
def get_compound_file_binary(file): """ 获取复合文件二进制格式文件中的数据 Compound File Binary Format Files https://stackoverflow.com/questions/12705527/reading-excel-files-with-xlrd """ try: import olefile with open(file, 'rb') as f: if str(file).endswith('.xls'): ole = olefile.OleFileIO(f) # print(ole.listdir()) if ole.exists('Workbook'): d = ole.openstream('Workbook') return d.read() return f.read() except ImportError as e: print(Fore.RED + "注:找不到 olefile,请安装它: pip install olefile", Style.RESET_ALL) pass
def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "doc97" self.keyTypes = ['password'] self.key = None self.salt = None # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx with ole.openstream('wordDocument') as stream: fib = _parseFib(stream) # https://msdn.microsoft.com/en-us/library/dd923367(v=office.12).aspx tablename = '1Table' if fib.base.fWhichTblStm == 1 else '0Table' Info = namedtuple('Info', ['fib', 'tablename']) self.info = Info( fib=fib, tablename=tablename, )
def insert_olefile( self, filepath, icon, caption ): # Insert the file as OLE oletmpl = 'resource/oleObject1.bin' tmpolefile = tempfile.NamedTemporaryFile().name shutil.copy( oletmpl, tmpolefile ) ole = olefile.OleFileIO(tmpolefile,write_mode=True) streams = ole.listdir() for s in streams: print s, ole.get_size(s) streamname = '\x01Ole10Native' with open(filepath,'rb') as f: size = ole.get_size(streamname) print 'Size: ' + str( size ) data = f.read().ljust(size,'\x00') print 'Data size: ' + str( len( data ) ) ole.write_stream(streamname, data) # Insert file icon / name tmpemffile = tempfile.NamedTemporaryFile().name emf = pyemf.EMF(100,70,300) icotmpl = 'resource/' + icon + '.emf' emf.load(icotmpl) emf.TextOut( 10, 80, caption ) emf.save(tmpemffile) streamname = '\x03ObjInfo' with open( tmpemffile, 'rb' ) as f: size = ole.get_size(streamname) print 'Size: ' + str( size ) data = f.read().ljust(size,'\x00') print 'Data size: ' + str( len( data ) ) ole.write_stream(streamname, data) ole.close() intpath = 'word/embeddings/oleObject1.bin' with open( tmpolefile, 'rb' ) as f: self.contents[intpath] = f.read() # Get a rid rid = self.add_rel( 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject', intpath )
def _get_ole_metadata(fp): try: raw = olefile.OleFileIO(fp).get_metadata() tidied = {} # The value we get here is a signed 16-bit quantity, even though # the file format specifies values up to 65001 tidied["codepage"] = raw.codepage if tidied["codepage"] < 0: tidied["codepage"] += 65536 codec = _codepage_to_codec(tidied["codepage"]) if codec: for name in olefile.OleMetadata.SUMMARY_ATTRIBS: if name in tidied: continue value = getattr(raw, name) if isinstance(value, bytes): value, _ = codec.decode(value) tidied[name] = value return tidied except FileNotFoundError: return None
def ms_doc(ms_file_list): software_list = [] user_names = [] info('Extracting MSDOCS MetaData') for filename in ms_file_list: try: data = olefile.OleFileIO(filename) meta = data.get_metadata() author = re.sub('[^0-9a-zA-Z]+', ' ', meta.author) company = re.sub('[^0-9a-zA-Z]+', ' ', meta.company) software = re.sub('[^0-9a-zA-Z]+', ' ', meta.creating_application) save_by = re.sub('[^0-9a-zA-Z]+', ' ', meta.last_saved_by) if author: oddity = re.match('(\s\w\s+(\w\s+)+\w)', author) if oddity: oddity = str(oddity.group(1)).replace(' ', '') user_names.append(str(oddity).title()) else: user_names.append(str(author).title()) if software: oddity2 = re.match('(\s\w\s+(\w\s+)+\w)', software) if oddity2: oddity2 = str(oddity2.group(1)).replace(' ', '') software_list.append(oddity2) else: software_list.append(software) if save_by: oddity3 = re.match('(\s\w\s+(\w\s+)+\w)', save_by) if oddity3: oddity3 = str(oddity3.group(1)).replace(' ', '') user_names.append(str(oddity3).title()) else: user_names.append(str(save_by).title()) except Exception: pass info('Finished Extracting MSDOC MetaData') return (user_names, software_list)
def decrypt_agile_aes_cbc(self, encryption_key): SEGMENT_LENGTH = 4096 ole = olefile.OleFileIO(self.source_file) ep = ole.openstream('EncryptedPackage') try: obuf = b'' totalSize = unpack('<I', ep.read(4))[0] #sys.stderr.write("totalSize: {}\n".format(totalSize)) ep.seek(8) with open(self.output_file, 'wb') as fp: for i, ibuf in enumerate(iter(functools.partial(ep.read, SEGMENT_LENGTH), b'')): saltWithBlockKey = self.encryption_info.key_data_salt + pack('<I', i) iv = hashCalc(saltWithBlockKey, self.encryption_info.key_data_hash_algorithm).digest() iv = iv[:16] aes = AES.new(encryption_key, AES.MODE_CBC, iv) dec = aes.decrypt(ibuf) fp.write(dec) return True finally: ole.close()
def get_notes(sticky_notes_file_path): notes = [] snt_file = olefile.OleFileIO(sticky_notes_file_path) for storage in snt_file.listdir(storages=True, streams=False): note_id = storage[0] # UUID-like string representing the note ID note_text_rtf_file = '0' # RTF content of the note with snt_file.openstream([note_id, note_text_rtf_file]) as note_content: rawdata = note_content.read() encoding = chardet.detect(rawdata) #print(encoding) note_text_rtf = rawdata.decode('ascii') #note_text_rtf = rawdata.decode('utf-8') #print(note_text_rtf) notes.append({'text': getMarkdown(note_text_rtf), 'color': None}) snt_file.close() return notes
def _winoffice(self): """Processes a winoffice file using olefile/oletools.""" self.cur_file.add_log_details('processing_type', 'WinOffice') # Try as if it is a valid document oid = oletools.oleid.OleID(self.cur_file.src_path) if not olefile.isOleFile(self.cur_file.src_path): # Manual processing, may already count as suspicious try: ole = olefile.OleFileIO(self.cur_file.src_path, raise_defects=olefile.DEFECT_INCORRECT) except: self.cur_file.add_log_details('not_parsable', True) self.cur_file.make_dangerous() if ole.parsing_issues: self.cur_file.add_log_details('parsing_issues', True) self.cur_file.make_dangerous() else: if ole.exists('macros/vba') or ole.exists('Macros') \ or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'): self.cur_file.add_log_details('macro', True) self.cur_file.make_dangerous() else: indicators = oid.check() # Encrypted ban be set by multiple checks on the script if oid.encrypted.value: self.cur_file.add_log_details('encrypted', True) self.cur_file.make_dangerous() if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \ or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'): self.cur_file.add_log_details('macro', True) self.cur_file.make_dangerous() for i in indicators: if i.id == 'ObjectPool' and i.value: # FIXME: Is it suspicious? self.cur_file.add_log_details('objpool', True) elif i.id == 'flash' and i.value: self.cur_file.add_log_details('flash', True) self.cur_file.make_dangerous() self._safe_copy()
def __init__(self, path: str): self._olefile = olefile.OleFileIO(path, write_mode=True) self._stack = contextlib.ExitStack() self._tmp_dir = self._stack.enter_context(utils.TempDirectory()) def ReadStream(name): with self._olefile.openstream(name) as stream: return stream.read(self._olefile.get_size(name)) string_pool_raw = ReadStream(STRING_POOL_STREAM_NAME) string_data_raw = ReadStream(STRING_DATA_STREAM_NAME) self._string_pool = StringPool(string_pool_raw, string_data_raw) feature_raw = ReadStream(FEATURE_STREAM_NAME) self._feature_table = FeatureTable(feature_raw, self._string_pool) cab_path = os.path.join(self._tmp_dir, "input.cab") cab_tmp_path = os.path.join(self._tmp_dir, "cab_tmp_dir") with open(cab_path, "wb") as f: f.write(ReadStream(GRR_CAB_STREAM_NAME)) self._cab = cab_utils.Cab(cab_path, cab_tmp_path) self._cab.ExtractFiles() self._cab.WriteFile("PaddingFile", b"")
def seed_analyzer(self): self.ole = olefile.OleFileIO(self.seed, write_mode=True) self.hwp_field = self.ole.listdir() print '\n' print '[*] HWP Seed info\n' #hwp field와 그 field의 size를 딕셔너리에 저장 for i in range(len(self.hwp_field)): field = self.hwp_field[i] field_size = self.ole.get_size(self.hwp_field[i]) if len(field) > 1: storage = field[0] stream = field[1] field = str(storage + '/' + stream) self.hwp_field_size[field] = field_size else: field = str(field[0]) self.hwp_field_size[field] = field_size
def get_platform(self): """ Gets the platform that the project is targeting (ME or SE) returns str """ with olefile.OleFileIO(self.file) as ole: # list the directory structure of the file ld = ole.listdir() name = '' for item in ld: if item[0].endswith('.med') or item[0].endswith('.sed'): name = item[0] extension = name[-3:].lower() if extension == "med": return "FactoryTalk View Studio ME" elif extension == "sed": return "FactoryTalk View Studio SE" else: return "Unknown platform"
def _read_doc_vars(fname): """ Use a heuristic to try to read in document variable names and values from the 1Table OLE stream. Note that this heuristic is kind of hacky and is not close to being a general solution for reading in document variables, but it serves the need for ViperMonkey emulation. TODO: Replace this when actual support for reading doc vars is added to olefile. """ try: # Pull out all of the wide character strings from the 1Table OLE data. ole = olefile.OleFileIO(fname, write_mode=False) data = ole.openstream("1Table").read() tmp_strs = re.findall("(([^\x00-\x1F\x7F-\xFF]\x00){4,})", data) strs = [] for s in tmp_strs: s1 = s[0].replace("\x00", "").strip() strs.append(s1) # Treat each wide character string as a potential variable that has a value # of the string 1 positions ahead on the current string. This introduces "variables" # that don't really exist into the list, but these variables will not be accessed # by valid VBA so emulation will work. pos = 0 r = [] for s in strs: # TODO: Figure out if this is 1 or 2 positions ahead. if ((pos + 1) < len(strs)): r.append((s, strs[pos + 1])) pos += 1 # Return guesses at doc variable assignments. return r except Exception as e: log.error("Cannot read document variables. " + str(e)) return []
def metadata(self, args, file, opts): try: meta = olefile.OleFileIO(file.file_path).get_metadata() except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') attribs = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', 'template', 'last_saved_by', 'revision_number', 'total_edit_time', 'last_printed', 'create_time', 'last_saved_time', 'num_pages', 'num_words', 'num_chars', 'thumbnail', 'creating_application', 'security', 'codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', 'slides', 'notes', 'hidden_slides', 'mm_clips', 'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', 'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', 'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', 'content_type', 'content_status', 'language', 'doc_version'] output = {} for attrib in attribs: if isinstance(getattr(meta, attrib), bytes): output[attrib] = str(getattr(meta, attrib).decode("utf-8")) else: output[attrib] = str(getattr(meta, attrib)) return output
def main(pathname): # Check file exists: if not os.path.exists(pathname): raise FileNotFoundError(pathname) ole = olefile.OleFileIO(pathname) meta = ole.get_metadata() for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) print((prop, value)) # print(meta.dump()) create_time = getattr(meta, "create_time") last_saved_time = getattr(meta, "last_saved_time") dates_to_compare = [] dates_to_compare.append(create_time) dates_to_compare.append(last_saved_time) max_time = max(dates_to_compare) # print(max_time) max_time_yyyymmdd = max_time.strftime("%Y-%m-%d") print(f"Max Mod Time: {max_time_yyyymmdd}") # get raw info: # other_props = ole.getproperties("\x05SummaryInformation") # for item in other_props.values(): # print(item) if not ole.exists("\x05DigitalSignature"): print("WARNING: File not signed!") else: with ole.openstream("\x05DigitalSignature") as fh: sig_data = fh.read() print(f"Digital Signature: {len(sig_data)} bytes") # end: ole.close()
def process_file(filename, extract): pkgobj = None if olefile.isOleFile(filename): print ' [*] File is an OLE file...' ole = olefile.OleFileIO(filename) filelist = ole.listdir() print ' [*] Processing Streams...' for fname in filelist: if '\x01Ole10Native' in fname: print ' [*] Found Ole10Native Stream...checking for packager data' sdata = ole.openstream(fname).read() if sdata[4:6].encode('hex') == '0200': print ' [*] Stream contains Packager Formatted data...' pkgobj = PackagerStream(sdata[4:].encode('hex')) print print pkgobj elif isstream(filename): with open(filename, 'rb') as f: sdata = f.read() print ' [*] File is an extracted Packager Stream' print ' [*] Stream contains Packager Formatted data...' pkgobj = PackagerStream(sdata[4:].encode('hex')) print print pkgobj else: # Treat the file as an rtf doc rd = RTFDoc(filename) print ' [*] Scanning file for embedded objects' rd.scan() if extract: try: print ' Extracting embedded data as %s' % pkgobj.gethash('md5') with open(pkgobj.gethash('md5'), 'wb') as out: out.write(pkgobj.Data) except Exception as e: print ' [!] An error occurred while writing the file :: %s' % e
def extract_ole_metadata(doc_path) -> str: metadata = "" ole = olefile.OleFileIO(doc_path) olemeta = ole.get_metadata() value = getattr(olemeta, "author").decode(ENCODING) if value: metadata += AUTHOR + value + "\n" else: metadata += AUTHOR + NA + "\n" value = getattr(olemeta, "title").decode(ENCODING) if value: metadata += TITLE + value + "\n" else: metadata += TITLE + NA + "\n" value = getattr(olemeta, "last_saved_by").decode(ENCODING) if value: metadata += LAST_SAVED_BY + value + "\n" else: metadata += LAST_SAVED_BY + NA + "\n" value = getattr(olemeta, "create_time").strftime(TIME_FORMAT) if value: metadata += CREATE_TIME + value + "\n" else: metadata += CREATE_TIME + NA + "\n" value = getattr(olemeta, "last_saved_time").strftime(TIME_FORMAT) if value: metadata += MODIFIED_TIME + value + "\n" else: metadata += MODIFIED_TIME + NA + "\n" return metadata
def load_data(self): tmp_list = [] hex_content = [] total = [] hex_arr = [] total_list = [] total_arr = [] #check if olefile for file in self.im_file: im_file = file if not olefile.isOleFile(im_file): self.im_file.remove(file) for file in self.im_file: im_file = file ole = olefile.OleFileIO(im_file, write_mode=True) stream = ole.openstream(field) data = stream.read() stream.seek(0) #print(file) #print('[*] real data: ',data) hex_content.append(data) #print(len(hex_content)) #274 for string_hex in hex_content: string_hex = binascii.hexlify(string_hex) hex_list = [ int(string_hex[i:i + 2], 16) for i in range(0, len(string_hex), 2) ] hex_arr = np.asarray(hex_list) total_list.append(hex_arr) total_arr = np.asarray(total_list) return total_arr
def read(doc, fileName): if (olefile.isOleFile(fileName)): setEndianess(LITTLE_ENDIAN) ole = olefile.OleFileIO(fileName) p = ole.getproperties('\x05DocumentSummaryInformation', convert_time=True, no_conversion=[10]) p = ole.getproperties('\x05SummaryInformation', convert_time=True, no_conversion=[10]) if (DEBUG): FreeCAD.Console.PrintMessage("==== ClassData ===\n") readClassData(ole, fileName) if (DEBUG): FreeCAD.Console.PrintMessage("==== Config ===\n") readConfig(ole, fileName) if (DEBUG): FreeCAD.Console.PrintMessage("==== DllDirectory ===\n") readDllDirectory(ole, fileName) if (DEBUG): FreeCAD.Console.PrintMessage("==== ClassDirectory3 ===\n") readClassDirectory3(ole, fileName) if (DEBUG): FreeCAD.Console.PrintMessage("==== VideoPostQueue ===\n") readVideoPostQueue(ole, fileName) if (DEBUG): FreeCAD.Console.PrintMessage("==== Scene ===\n") readScene(doc, ole, fileName) else: FreeCAD.Console.PrintError("File seems to be no 3D Studio Max file!")
def run(self) : print ('Tika Start') print ( '확장자명 : %s' % self.ext ) if self.ext in self.OLE_EXT_LIST : print (self.ext) ole = olefile.OleFileIO(self.path) hwpTree = ole.listdir() contents = ole.openstream('PrvText').read() self.fileWrite(contents.decode('utf-16')) # for oneInfo in hwpTree : # if 'BodyText' in oneInfo : # for oneStr in oneInfo : # if oneStr != 'BodyText' : #contents = ole.openstream(('BodyText/%s' % oneStr)).read() #print (type(ole.openstream('PrvText').read()) ) #contents = ole.openstream(('BodyText/%s' % oneStr)).decode('utf-16le').readlines() # unzipCont = zlib.decompress(contents, -15) # print( unzipCont ) # print( base64.decodestring(unzipCont) ) # self.fileWrite(base64.decodestring(unzipCont).decode('utf-16')) #print (ole.listdir( streams = False, storages = True )[0] ) #datas = ole.get_metadata() #datas = ole.openstream('BodyText') #print (ole.dump()) # oleStr = datas.read() # print(dir(oleStr)) # self.fileWrite(oleStr) else : self.fileToText()
def obtain_meta(): for root,dir,files in os.walk(sys.argv[1]): for file in files : try: if file not in file_l: file_l.append(file) ole_win=olefile.OleFileIO(file,raise_defects=olefile.DEFECT_INCORRECT) file_hash_SHA256= hashlib.sha256() file_hash_MD5=hashlib.md5() with open(file,'rb') as f: file_bs=f.read(BUFFER_SIZE) while len(file_bs)>0: file_hash_SHA256.update(file_bs) file_hash_MD5.update(file_bs) file_bs=f.read(BUFFER_SIZE) print(file) print(os.path.abspath(file)) print("SHA-256:",file_hash_SHA256.hexdigest()) print('MD5:',file_hash_MD5.hexdigest()) meta=ole_win.get_metadata() print(meta.dump()) print('') except: pass
def recognize(cls, ftg): # Here there's an issue with non-OLE files smaller than 1536 bytes # see https://github.com/decalage2/olefile/issues/142 # Workaround: pad data when it's smaller than 1536 bytes # TODO: use the new data parameter of isOleFile when it's implemented if len(ftg.data) < 1536: data = ftg.data + (b'\x00' * 1536) else: data = ftg.data if olefile.isOleFile(data): # open the OLE file try: # Open and parse the OLE file: ftg.olefile = olefile.OleFileIO(ftg.data) # Extract the CLSID of the root storage ftg.root_clsid = ftg.olefile.root.clsid ftg.root_clsid_name = clsid.KNOWN_CLSIDS.get( ftg.root_clsid, None) except: # TODO: log the error return False return True else: return False
def decode(): version() try: opts, args = getopt.getopt(sys.argv[1:], "hf:", ["help", "file="]) except getopt.GetoptError: help() sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): help() sys.exit() elif opt in ("-f", "--file"): zip_ref = zipfile.ZipFile(arg, "r") output = arg + "_unpacked" zip_ref.extractall(output) zip_ref.close() for path, subdirs, files in os.walk(output): for name in files: sys.stdout.write("%s\n" % name) if name.endswith(".bin"): oleFile = os.path.join(path, name) ole = olefile.OleFileIO(oleFile) oleList = ole.listdir() for i in oleList: sys.stdout.write("%s\n" % i) pics = ole.openstream(i) data = pics.read() parcelID = os.path.join(path,i[0]) outFile = open(parcelID, 'w') outFile.write(data)
def process_file_scanexpr (container, filename, data): """ Process a single file :param container: str, path and filename of container if the file is within a zip archive, None otherwise. :param filename: str, path and filename of file on disk, or within the container. :param data: bytes, content of the file if it is in a container, None if it is a file on disk. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) if container: display_filename = '%s in %s' % (filename, container) else: display_filename = filename print '='*79 print 'FILE:', display_filename all_code = '' try: #TODO: handle olefile errors, when an OLE file is malformed vba = VBA_Parser(filename, data, relaxed=True) print 'Type:', vba.type if vba.detect_vba_macros(): # Read in document metadata. ole = olefile.OleFileIO(filename) vba_library.meta = ole.get_metadata() #print 'Contains VBA Macros:' for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): # hide attribute lines: #TODO: option to disable attribute filtering vba_code_filtered = filter_vba(vba_code) print '-'*79 print 'VBA MACRO %s ' % vba_filename print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) print '- '*39 # detect empty macros: if vba_code_filtered.strip() == '': print '(empty macro)' else: # TODO: option to display code print vba_code_filtered vba_code = vba_collapse_long_lines(vba_code) all_code += '\n' + vba_code print '-'*79 print 'EVALUATED VBA EXPRESSIONS:' t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value')) t.align = 'l' t.max_width['Obfuscated expression'] = 36 t.max_width['Evaluated value'] = 36 for expression, expr_eval in scan_expressions(all_code): t.add_row((repr(expression), repr(expr_eval))) print t else: print 'No VBA macros found.' except: #TypeError: #raise #TODO: print more info if debug mode #print sys.exc_value # display the exception with full stack trace for debugging, but do not stop: traceback.print_exc() print ''
def process_file (container, filename, data, altparser=False, strip_useless=False): """ Process a single file :param container: str, path and filename of container if the file is within a zip archive, None otherwise. :param filename: str, path and filename of file on disk, or within the container. :param data: bytes, content of the file if it is in a container, None if it is a file on disk. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) if container: display_filename = '%s in %s' % (filename, container) else: display_filename = filename print '='*79 print 'FILE:', display_filename vm = ViperMonkey() try: #TODO: handle olefile errors, when an OLE file is malformed vba = VBA_Parser(filename, data, relaxed=True) print 'Type:', vba.type if vba.detect_vba_macros(): # Read in document metadata. try: ole = olefile.OleFileIO(filename) vba_library.meta = ole.get_metadata() except: vba_library.meta = {} # Parse the VBA streams. comp_modules = parse_streams(vba, strip_useless) for m in comp_modules: vm.add_compiled_module(m) # Pull out form variables. for (subfilename, stream_path, form_variables) in vba.extract_form_strings_extended(): if form_variables is not None: var_name = form_variables['name'] macro_name = stream_path if ("/" in macro_name): start = macro_name.rindex("/") + 1 macro_name = macro_name[start:] global_var_name = (macro_name + "." + var_name).encode('ascii', 'ignore') val = form_variables['value'] vm.globals[global_var_name.lower()] = val log.debug("Added VBA form variable %r = %r to globals." % (global_var_name, val)) print '-'*79 print 'TRACING VBA CODE (entrypoint = Auto*):' vm.trace() # print table of all recorded actions print('Recorded Actions:') print(vm.dump_actions()) else: print 'No VBA macros found.' except: #TypeError: #raise #TODO: print more info if debug mode #print sys.exc_value # display the exception with full stack trace for debugging, but do not stop: traceback.print_exc() print ''