def timestamp(): parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') (options, args) = parser.parse_args() if len(args) == 0: print(__doc__) parser.print_help() sys.exit() for container, filename, data in xglob.iter_files(args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename print('') if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) print ('[*] Timestamp') process_ole(ole) ole.close()
def process_doc(filepath): """ find dde links in word ole (.doc/.dot) file like process_xml, returns a concatenated unicode string of dde links or empty if none were found. dde-links will still begin with the dde[auto] key word (possibly after some whitespace) """ log.debug('process_doc') ole = olefile.OleFileIO(filepath, path_encoding=None) links = [] for sid, direntry in enumerate(ole.direntries): is_orphan = direntry is None if is_orphan: # this direntry is not part of the tree --> unused or orphan direntry = ole._load_direntry(sid) is_stream = direntry.entry_type == olefile.STGTY_STREAM log.debug('direntry {:2d} {}: {}'.format( sid, '[orphan]' if is_orphan else direntry.name, 'is stream of size {}'.format(direntry.size) if is_stream else 'no stream ({})'.format(direntry.entry_type))) if is_stream: new_parts = process_doc_stream( ole._open(direntry.isectStart, direntry.size)) links.extend(new_parts) # mimic behaviour of process_docx: combine links to single text string return u'\n'.join(links)
def main(): """ Main function """ try: filename = sys.argv[1] except: filename = easygui.fileopenbox() try: ole = olefile.OleFileIO(filename) listdir = ole.listdir() streams = [] for direntry in listdir: #print direntry streams.append('/'.join(direntry)) streams.append(ABOUT) streams.append(QUIT) stream = True while stream is not None: msg = "Select a stream, or press Esc to exit" title = "olebrowse" stream = easygui.choicebox(msg, title, streams) if stream is None or stream == QUIT: break if stream == ABOUT: about() else: browse_stream(ole, stream) except: easygui.exceptionbox()
def main(): rtf_objects, tags = get_rtf_objects() macros, new_tags = get_macros() tags += new_tags try: ole = olefile.OleFileIO('/sample') metadata, new_tags = get_metadata(ole) tags += new_tags result = { "directory_entries": get_directory_entries(ole), "metadata": metadata, "rtf_objects": rtf_objects, "macros": macros, "tags": tags, } except IOError: # Not a OLE file result = {"rtf_objects": rtf_objects, "macros": macros, "tags": tags} for key in result: if result[key]: print(json.dumps(result)) return print("{}")
def main(): # print banner with version print('oletimes %s - http://decalage.info/python/oletools' % __version__) print ('THIS IS WORK IN PROGRESS - Check updates regularly!') print ('Please report any issue at https://github.com/decalage2/oletools/issues') usage = 'usage: oletimes [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') # TODO: add logfile option # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, # help="logging level debug/info/warning/error/critical (default=%default)") (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print(__doc__) parser.print_help() sys.exit() for container, filename, data in xglob.iter_files(args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # TODO: handle xglob errors # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename print("=" * 79) print('FILE: %s\n' % full_name) if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) process_ole(ole) ole.close()
def main(): try: ole = olefile.OleFileIO(sys.argv[1]) except IndexError: sys.exit(__doc__) # parse and display metadata: meta = ole.get_metadata() # console output with UTF8 encoding: # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3 console_utf8 = sys.stdout #codecs.getwriter('utf8')(sys.stdout) # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print('') print('Properties from the DocumentSummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() ole.close()
def process_ole(filepath): """ find dde links in ole file like process_xml, returns a concatenated unicode string of dde links or empty if none were found. dde-links will still being with the dde[auto] key word (possibly after some whitespace) """ log.debug('process_ole') ole = olefile.OleFileIO(filepath, path_encoding=None) text_parts = process_ole_storage(ole) # mimic behaviour of process_openxml: combine links to single text string return u'\n'.join(text_parts)
def check(self): # check if it is actually an OLE file: oleformat = Indicator('ole_format', True, name='OLE format') self.indicators.append(oleformat) if not olefile.isOleFile(self.filename): oleformat.value = False return self.indicators # parse file: self.ole = olefile.OleFileIO(self.filename) # checks: self.check_properties() self.check_encrypted() self.check_word() self.check_excel() self.check_powerpoint() self.check_visio() self.check_ObjectPool() self.check_flash() self.ole.close() return self.indicators
def ms_doc(ms_file_list): software_list = [] user_names = [] info('Extracting MSDOCS MetaData') for filename in ms_file_list: try: data = olefile.OleFileIO(filename) meta = data.get_metadata() author = re.sub('[^0-9a-zA-Z]+', ' ', meta.author) company = re.sub('[^0-9a-zA-Z]+', ' ', meta.company) software = re.sub('[^0-9a-zA-Z]+', ' ', meta.creating_application) save_by = re.sub('[^0-9a-zA-Z]+', ' ', meta.last_saved_by) if author: oddity = re.match('(\s\w\s+(\w\s+)+\w)', author) if oddity: oddity = str(oddity.group(1)).replace(' ', '') user_names.append(str(oddity).title()) else: user_names.append(str(author).title()) if software: oddity2 = re.match('(\s\w\s+(\w\s+)+\w)', software) if oddity2: oddity2 = str(oddity2.group(1)).replace(' ', '') software_list.append(oddity2) else: software_list.append(software) if save_by: oddity3 = re.match('(\s\w\s+(\w\s+)+\w)', save_by) if oddity3: oddity3 = str(oddity3.group(1)).replace(' ', '') user_names.append(str(oddity3).title()) else: user_names.append(str(save_by).title()) except Exception: pass info('Finished Extracting MSDOC MetaData') return (user_names, software_list)
def main(): # print banner with version print('oletimes %s - http://decalage.info/python/oletools' % __version__) try: ole = olefile.OleFileIO(sys.argv[1]) except IndexError: sys.exit(__doc__) def dt2str(dt): """ Convert a datetime object to a string for display, without microseconds :param dt: datetime.datetime object, or None :return: str, or None """ if dt is None: return None dt = dt.replace(microsecond=0) return str(dt) t = prettytable.PrettyTable( ['Stream/Storage name', 'Modification Time', 'Creation Time']) t.align = 'l' t.max_width = 26 #t.border = False #print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime()) t.add_row( ('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime()))) for obj in ole.listdir(streams=True, storages=True): #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) print(t) ole.close()
def analyze(self, afile): '''Analyze OLE files and extract metadata about the file into the FileAnalysis object. Args: afile (FileAnalysis): The file to be analyzed. Returns: None ''' if afile.mime_type in self.analyzed_mimes: # Parse the metadata for the ole file and add all ole metadata # attributes to the FileAnalysis object. This should add a ton # of contectual information to the file. try: ole = olefile.OleFileIO(afile.path) process_metadata = True except IOError: afile.errors = afile.errors + [ 'doc plugin: unsupported filetype' ] output = 'None' afile.plugin_output[self.__NAME__] = output process_metadata = False # There are OLE files out there with LOTS of embedded objects. # This should prevent plugin crashes for those cases. except RuntimeError: afile.errors = afile.errors + [ 'doc plugin: max recursion reached' ] output = 'None' process_metadata = False afile.suspicious = True process_metadata = False if process_metadata: meta = ole.get_metadata() # These loops iterate through the meta for attributes and then # set attributes with the same name in the FileAnalysis object for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) setattr(afile, prop, value) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) setattr(afile, prop, value) # Thumbnails are binary streams and muck up the output so they # are removed. This is a temporary work-around... the doc # analyzer will be rewritten to accomidate things like this if hasattr(afile, 'thumbnail'): afile.has_thumbnail = True del afile.thumbnail # Explicitly call close to ensure that the ole object gets closed ole.close() # Parse the file again, this time looking for VBA scripts. try: parser = olevba.VBA_Parser(afile.path) except TypeError: afile.errors = afile.errors + [ 'doc plugin: unsupported filetype' ] output = 'None' afile.plugin_output[self.__NAME__] = output return results = parser.analyze_macros() contains_macro = parser.detect_vba_macros() if contains_macro and self.alert_on_macro: afile.alert = True if contains_macro and self.suspicious_on_macro: afile.suspicious = True output = '' if results is not None: for result in results: output = output + '[%s] keyword: %s description: %s' % result else: output = 'None' if contains_macro: afile.vba = parser.reveal() afile.plugin_output[self.__NAME__] = output # The parser requires an explicit close parser.close()
def main(): usage = 'usage: oledir [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, # help="logging level debug/info/warning/error/critical (default=%default)") # TODO: add logfile option (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print(BANNER) print(__doc__) parser.print_help() sys.exit() # print banner with version print(BANNER) if os.name == 'nt': colorclass.Windows.enable(auto_colors=True, reset_atexit=True) for container, filename, data in xglob.iter_files(args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename print('OLE directory entries in file %s:' % full_name) if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) # ole.dumpdirectory() # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size')) # t.align = 'l' # t.max_width['id'] = 4 # t.max_width['Status'] = 6 # t.max_width['Type'] = 10 # t.max_width['Name'] = 10 # t.max_width['Left'] = 5 # t.max_width['Right'] = 5 # t.max_width['Child'] = 5 # t.max_width['1st Sect'] = 8 # t.max_width['Size'] = 6 table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6], header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'), style=tablestream.TableStyleSlim) # TODO: read ALL the actual directory entries from the directory stream, because olefile does not! # TODO: OR fix olefile! # TODO: olefile should store or give access to the raw direntry data on demand # TODO: oledir option to hexdump the raw direntries # TODO: olefile should be less picky about incorrect directory structures for id in range(len(ole.direntries)): d = ole.direntries[id] if d is None: # this direntry is not part of the tree: either unused or an orphan d = ole._load_direntry(id) #ole.direntries[id] # print('%03d: %s *** ORPHAN ***' % (id, d.name)) if d.entry_type == olefile.STGTY_EMPTY: status = 'unused' else: status = 'ORPHAN' else: # print('%03d: %s' % (id, d.name)) status = '<Used>' if d.name.startswith('\x00'): # this may happen with unused entries, the name may be filled with zeroes name = '' else: # handle non-printable chars using repr(), remove quotes: name = repr(d.name)[1:-1] left = sid_display(d.sid_left) right = sid_display(d.sid_right) child = sid_display(d.sid_child) entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown') etype_color = STORAGE_COLORS.get(d.entry_type, 'red') status_color = STATUS_COLORS.get(status, 'red') # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s' # %(entry_type, left, right, child)) # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size)) table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size), colors=(None, status_color, etype_color, None, None, None, None, None, None)) ole.close()
def find_ole(filename, data): """ try to open somehow as zip/ole/rtf/... ; yield None if fail If data is given, filename is (mostly) ignored. yields embedded ole streams in form of OleFileIO. """ if data is not None: # isOleFile and is_ppt can work on data directly but zip need file # --> wrap data in a file-like object without copying data log.debug('working on data, file is not touched below') arg_for_ole = data arg_for_zip = FakeFile(data) else: # we only have a file name log.debug('working on file by name') arg_for_ole = filename arg_for_zip = filename ole = None try: if olefile.isOleFile(arg_for_ole): if is_ppt(arg_for_ole): log.info('is ppt file: ' + filename) for ole in find_ole_in_ppt(arg_for_ole): yield ole ole = None # is closed in find_ole_in_ppt # in any case: check for embedded stuff in non-sectored streams log.info('is ole file: ' + filename) ole = olefile.OleFileIO(arg_for_ole) yield ole elif is_zipfile(arg_for_zip): log.info('is zip file: ' + filename) zipper = ZipFile(arg_for_zip, 'r') for subfile in zipper.namelist(): head = b'' try: with zipper.open(subfile) as file_handle: head = file_handle.read(len(olefile.MAGIC)) except RuntimeError: log.error('zip is encrypted: ' + filename) yield None continue if head == olefile.MAGIC: log.info(' unzipping ole: ' + subfile) with ZipSubFile(zipper, subfile) as file_handle: try: ole = olefile.OleFileIO(file_handle) yield ole except IOError: log.warning('Error reading data from {0}/{1} or ' 'interpreting it as OLE object'.format( filename, subfile)) log.debug('', exc_info=True) finally: if ole is not None: ole.close() ole = None else: log.debug('unzip skip: ' + subfile) else: log.warning( 'open failed: {0} (or its data) is neither zip nor OLE'.format( filename)) yield None except Exception: log.error('Caught exception opening {0}'.format(filename), exc_info=True) yield None finally: if ole is not None: ole.close()
if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) print ('[*] Timestamp') process_ole(ole) ole.close() if __name__ == "__main__": try: stream = [] var1 = sys.argv[1] ole = olefile.OleFileIO(var1) fileheader(ole) hwpsummary(ole) bin_list = stream_list(ole) if [s for s in bin_list if "BinData" in s]: decom = bin_data(ole, bin_list) try: detect_yara(decom) except Exception: print () print ('Error, BinData not exist') ole.close() timestamp() except Exception as e: print (e)