def unarc(self, arc_engine_id, arc_name, fname_in_arc): import oletools.thirdparty.olefile.olefile as olefile import oletools.olevba as vba data = None if arc_engine_id == 'arc_ole': # 압축 해제 엔진 ID가 arc_ole 일 경우 o = olefile.OleFileIO(arc_name) fp = o.openstream(fname_in_arc) # OLE 파일 내부 파일 열기 data = fp.read() # 데이터 추출 o.close() return data elif arc_engine_id == 'arc_vba': # 압축 해제 엔진 ID가 arc_vba 일 경우 v = vba.VBA_Parser(arc_name) if v.detect_vba_macros(): # 매크로가 존재할 경우 macros = v.extract_all_macros() # 매크로 추출 for macro in macros(): if v.type == 'OLE': name = macro[1] elif v.type == 'OpenXML': name = macro[0] + '/' + macro[1] else: name = 'UNKNOWN' if name == fname_in_arc: data = macro[3] # VBA Code break v.close() return data return None
def arclist(self, filename, fileformat): import oletools.thirdparty.olefile.olefile as olefile import oletools.olevba as vba file_scan_list = [] # 검사 대상의 압축 엔진 ID 및 임의의 문자열이 저장될 리스트 if 'ff_ole' in fileformat: # format 함수에 의해 분석된 OLE 파일 포맷이 있을 경우 # OLE Stream 목록 추출 o = olefile.OleFileIO(filename) for path in o.listdir(): name = '/'.join(path) if o.get_type(name) == olefile.STGTY_STREAM: # 파일인 경우 file_scan_list.append(['arc_ole', name]) o.close() # 매크로 목록 추출 v = vba.VBA_Parser(filename) if v.detect_vba_macros(): # 매크로가 존재할 경우 macros = v.extract_all_macros() # 매크로 추출 for macro in macros(): name = macro[1] print name + '\n' + macro[3] + '\n' file_scan_list.append( ['arc_vba', name.encode('ascii', 'ignore')]) v.close() elif 'ff_zip' in fileformat: # OOXML(Open Office XML) 파일 포맷일 경우 # 매크로 목록 추출 v = vba.VBA_Parser(filename) if v.detect_vba_macros(): # 매크로가 존재할 경우 macros = v.extract_all_macros() # 매크로 추출 for macro in macros(): name = macro[0] + '/' + macro[1] print name + '\n' + macro[3] + '\n' file_scan_list.append( ['arc_vba', name.encode('ascii', 'ignore')]) v.close() return file_scan_list
def close_destination(self, destination): if destination.cword == b'objdata': log.debug('*** Close object data at index %Xh' % self.index) rtfobj = RtfObject() self.objects.append(rtfobj) rtfobj.start = destination.start rtfobj.end = destination.end # Filter out all whitespaces first (just ignored): hexdata1 = destination.data.translate(None, b' \t\r\n\f\v') # Then filter out any other non-hex character: hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1) if len(hexdata) < len(hexdata1): # this is only for debugging: nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1) log.debug('Found non-hex chars in hexdata: %r' % nonhex) # MS Word accepts an extra hex digit, so we need to trim it if present: if len(hexdata) & 1: log.debug('Odd length, trimmed last byte.') hexdata = hexdata[:-1] rtfobj.hexdata = hexdata object_data = binascii.unhexlify(hexdata) rtfobj.rawdata = object_data # TODO: check if all hex data is extracted properly obj = oleobj.OleObject() try: obj.parse(object_data) rtfobj.format_id = obj.format_id rtfobj.class_name = obj.class_name rtfobj.oledata_size = obj.data_size rtfobj.oledata = obj.data rtfobj.is_ole = True if obj.class_name.lower() == b'package': opkg = oleobj.OleNativeStream(bindata=obj.data, package=True) rtfobj.filename = opkg.filename rtfobj.src_path = opkg.src_path rtfobj.temp_path = opkg.temp_path rtfobj.olepkgdata = opkg.data rtfobj.is_package = True else: if olefile.isOleFile(obj.data): ole = olefile.OleFileIO(obj.data) rtfobj.clsid = ole.root.clsid rtfobj.clsid_desc = clsid.KNOWN_CLSIDS.get( rtfobj.clsid, 'unknown CLSID (please report at https://github.com/decalage2/oletools/issues)' ) except: pass log.debug('*** Not an OLE 1.0 Object')
def process_file(container, filename, data, output_dir=None): if output_dir: if not os.path.isdir(output_dir): log.info('creating output directory %s' % output_dir) os.mkdir(output_dir) fname_prefix = os.path.join(output_dir, sanitize_filename(filename)) else: base_dir = os.path.dirname(filename) sane_fname = sanitize_filename(filename) fname_prefix = os.path.join(base_dir, sane_fname) # TODO: option to extract objects to files (false by default) if data is None: data = open(filename, 'rb').read() print('-' * 79) print('File: %r - %d bytes' % (filename, len(data))) ole = olefile.OleFileIO(data) index = 1 for stream in ole.listdir(): if stream[-1] == '\x01Ole10Native': objdata = ole.openstream(stream).read() stream_path = '/'.join(stream) log.debug('Checking stream %r' % stream_path) try: print('extract file embedded in OLE object from stream %r:' % stream_path) print('Parsing OLE Package') opkg = OleNativeStream(bindata=objdata) print('Filename = %r' % opkg.filename) print('Source path = %r' % opkg.src_path) print('Temp path = %r' % opkg.temp_path) if opkg.filename: fname = '%s_%s' % (fname_prefix, sanitize_filename(opkg.filename)) else: fname = '%s_object_%03d.noname' % (fname_prefix, index) print('saving to file %s' % fname) open(fname, 'wb').write(opkg.data) index += 1 except: log.debug('*** Not an OLE 1.0 Object')
def main(): usage = 'usage: olemap [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option( "-z", "--zip", dest='zip_password', type='str', default=None, help= 'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)' ) parser.add_option( "-f", "--zipfname", dest='zip_fname', type='str', default='*', help= 'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)' ) # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, # help="logging level debug/info/warning/error/critical (default=%default)") parser.add_option("--header", action="store_true", dest="header", help='Display the OLE header (default: yes)') parser.add_option("--fat", action="store_true", dest="fat", help='Display the FAT (default: no)') parser.add_option("--minifat", action="store_true", dest="minifat", help='Display the MiniFAT (default: no)') parser.add_option('-x', "--exdata", action="store_true", dest="extra_data", help='Display a hex dump of extra data at end of file') # TODO: add logfile option (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print(BANNER) print(__doc__) parser.print_help() sys.exit() # if no display option is provided, set defaults: default_options = False if not (options.header or options.fat or options.minifat): options.header = True # options.fat = True # options.minifat = True default_options = True # print banner with version print(BANNER) for container, filename, data in xglob.iter_files( args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # TODO: handle xglob errors # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename print("-" * 79) print('FILE: %s\n' % full_name) if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) if options.header: show_header(ole, extra_data=options.extra_data) if options.fat: show_fat(ole) if options.minifat: show_minifat(ole) ole.close() # if no display option is provided, print a tip: if default_options: print( 'To display the FAT or MiniFAT structures, use options --fat or --minifat, and -h for help.' )
def extract_streams(self, file_name, file_contents): oles = {} try: streams_res = ResultSection(score=SCORE.INFO, title_text="Embedded document stream(s)") is_zip = False is_ole = False # Get the OLEs if zipfile.is_zipfile(file_name): is_zip = True z = zipfile.ZipFile(file_name) for f in z.namelist(): if f in oles: continue bin_data = z.open(f).read() bin_fname = os.path.join(self.working_directory, "{}.tmp".format(hashlib.sha256(bin_data).hexdigest())) with open(bin_fname, 'w') as bin_fh: bin_fh.write(bin_data) if olefile.isOleFile(bin_fname): oles[f] = olefile.OleFileIO(bin_fname) elif olefile2.isOleFile(bin_fname): oles[f] = olefile2.OleFileIO(bin_fname) z.close() if olefile.isOleFile(file_name): is_ole = True oles[file_name] = olefile.OleFileIO(file_name) elif olefile2.isOleFile(file_name): is_ole = True oles[file_name] = olefile2.OleFileIO(file_name) if is_zip and is_ole: streams_res.report_heuristics(Oletools.AL_Oletools_002) decompressed_macros = False for ole_filename in oles.iterkeys(): try: decompressed_macros |= self.process_ole_stream(oles[ole_filename], streams_res) except Exception: continue if decompressed_macros: streams_res.score = SCORE.HIGH for _, offset, rtfobject in rtf_iter_objects(file_contents): rtfobject_name = hex(offset) + '.rtfobj' extracted_obj = os.path.join(self.working_directory, rtfobject_name) with open(extracted_obj, 'wb') as fh: fh.write(rtfobject) self.request.add_extracted(extracted_obj, 'Embedded RTF Object at offset %s' % hex(offset), rtfobject_name) if len(streams_res.body) > 0: self.ole_result.add_section(streams_res) except Exception: self.log.debug("Error extracting streams: {}".format(traceback.format_exc(limit=2))) finally: for fd in oles.itervalues(): try: fd.close() except: pass