Python OleFileIO Examples, oletools.thirdparty.olefile.OleFileIO Python Examples

Example #1

0

Show file

def timestamp():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r", action="store_true", dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
                      help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
    parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
                      help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')

    (options, args) = parser.parse_args()

    if len(args) == 0:
        print(__doc__)
        parser.print_help()
        sys.exit()

    for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
                                                      zip_password=options.zip_password, zip_fname=options.zip_fname):
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename, container) if container else filename
        print('')
        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)
        print ('[*] Timestamp')
        process_ole(ole)
        ole.close()

Example #2

0

Show file

def process_doc(filepath):
    """
    find dde links in word ole (.doc/.dot) file

    like process_xml, returns a concatenated unicode string of dde links or
    empty if none were found. dde-links will still begin with the dde[auto] key
    word (possibly after some whitespace)
    """
    log.debug('process_doc')
    ole = olefile.OleFileIO(filepath, path_encoding=None)

    links = []
    for sid, direntry in enumerate(ole.direntries):
        is_orphan = direntry is None
        if is_orphan:
            # this direntry is not part of the tree --> unused or orphan
            direntry = ole._load_direntry(sid)
        is_stream = direntry.entry_type == olefile.STGTY_STREAM
        log.debug('direntry {:2d} {}: {}'.format(
            sid, '[orphan]' if is_orphan else direntry.name,
            'is stream of size {}'.format(direntry.size)
            if is_stream else 'no stream ({})'.format(direntry.entry_type)))
        if is_stream:
            new_parts = process_doc_stream(
                ole._open(direntry.isectStart, direntry.size))
            links.extend(new_parts)

    # mimic behaviour of process_docx: combine links to single text string
    return u'\n'.join(links)

Example #3

0

Show file

File: olebrowse.py Project: kohchanhong/matt-assignment

def main():
    """
    Main function
    """
    try:
        filename = sys.argv[1]
    except:
        filename = easygui.fileopenbox()
    try:
        ole = olefile.OleFileIO(filename)
        listdir = ole.listdir()
        streams = []
        for direntry in listdir:
            #print direntry
            streams.append('/'.join(direntry))
        streams.append(ABOUT)
        streams.append(QUIT)
        stream = True
        while stream is not None:
            msg = "Select a stream, or press Esc to exit"
            title = "olebrowse"
            stream = easygui.choicebox(msg, title, streams)
            if stream is None or stream == QUIT:
                break
            if stream == ABOUT:
                about()
            else:
                browse_stream(ole, stream)
    except:
        easygui.exceptionbox()

Example #4

0

Show file

File: run_oletools.py Project: stoerchl/malquarium

def main():
    rtf_objects, tags = get_rtf_objects()
    macros, new_tags = get_macros()
    tags += new_tags

    try:
        ole = olefile.OleFileIO('/sample')
        metadata, new_tags = get_metadata(ole)
        tags += new_tags

        result = {
            "directory_entries": get_directory_entries(ole),
            "metadata": metadata,
            "rtf_objects": rtf_objects,
            "macros": macros,
            "tags": tags,
        }

    except IOError:
        # Not a OLE file
        result = {"rtf_objects": rtf_objects, "macros": macros, "tags": tags}

    for key in result:
        if result[key]:
            print(json.dumps(result))
            return

    print("{}")

Example #5

0

Show file

File: oletimes.py Project: zldww2011/oletools

def main():
    # print banner with version
    print('oletimes %s - http://decalage.info/python/oletools' % __version__)
    print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
    print ('Please report any issue at https://github.com/decalage2/oletools/issues')

    usage = 'usage: oletimes [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r", action="store_true", dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
                      help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
    parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
                      help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')

    # TODO: add logfile option
    # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
    #                         help="logging level debug/info/warning/error/critical (default=%default)")

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print(__doc__)
        parser.print_help()
        sys.exit()

    for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
                                                      zip_password=options.zip_password, zip_fname=options.zip_fname):
        # TODO: handle xglob errors
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename, container) if container else filename
        print("=" * 79)
        print('FILE: %s\n' % full_name)
        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)
        process_ole(ole)
        ole.close()

Example #6

0

Show file

def main():
    try:
        ole = olefile.OleFileIO(sys.argv[1])
    except IndexError:
        sys.exit(__doc__)

    # parse and display metadata:
    meta = ole.get_metadata()

    # console output with UTF8 encoding:
    # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3
    console_utf8 = sys.stdout  #codecs.getwriter('utf8')(sys.stdout)

    # TODO: move similar code to a function

    print('Properties from the SummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.SUMMARY_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()
    print('')

    print('Properties from the DocumentSummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.DOCSUM_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()

    ole.close()

Example #7

0

Show file

def process_ole(filepath):
    """
    find dde links in ole file

    like process_xml, returns a concatenated unicode string of dde links or
    empty if none were found. dde-links will still being with the dde[auto] key
    word (possibly after some whitespace)
    """
    log.debug('process_ole')
    ole = olefile.OleFileIO(filepath, path_encoding=None)
    text_parts = process_ole_storage(ole)

    # mimic behaviour of process_openxml: combine links to single text string
    return u'\n'.join(text_parts)

Example #8

0

Show file

File: oleid.py Project: zldww2011/oletools

 def check(self):
     # check if it is actually an OLE file:
     oleformat = Indicator('ole_format', True, name='OLE format')
     self.indicators.append(oleformat)
     if not olefile.isOleFile(self.filename):
         oleformat.value = False
         return self.indicators
     # parse file:
     self.ole = olefile.OleFileIO(self.filename)
     # checks:
     self.check_properties()
     self.check_encrypted()
     self.check_word()
     self.check_excel()
     self.check_powerpoint()
     self.check_visio()
     self.check_ObjectPool()
     self.check_flash()
     self.ole.close()
     return self.indicators

Example #9

0

Show file

def ms_doc(ms_file_list):
    software_list = []
    user_names = []
    info('Extracting MSDOCS MetaData')
    for filename in ms_file_list:
        try:
            data = olefile.OleFileIO(filename)
            meta = data.get_metadata()
            author = re.sub('[^0-9a-zA-Z]+', ' ', meta.author)
            company = re.sub('[^0-9a-zA-Z]+', ' ', meta.company)
            software = re.sub('[^0-9a-zA-Z]+', ' ', meta.creating_application)
            save_by = re.sub('[^0-9a-zA-Z]+', ' ', meta.last_saved_by)
            if author:
                oddity = re.match('(\s\w\s+(\w\s+)+\w)', author)
                if oddity:
                    oddity = str(oddity.group(1)).replace(' ', '')
                    user_names.append(str(oddity).title())
                else:
                    user_names.append(str(author).title())
            if software:
                oddity2 = re.match('(\s\w\s+(\w\s+)+\w)', software)
                if oddity2:
                    oddity2 = str(oddity2.group(1)).replace(' ', '')
                    software_list.append(oddity2)
                else:
                    software_list.append(software)

            if save_by:
                oddity3 = re.match('(\s\w\s+(\w\s+)+\w)', save_by)
                if oddity3:
                    oddity3 = str(oddity3.group(1)).replace(' ', '')
                    user_names.append(str(oddity3).title())
                else:
                    user_names.append(str(save_by).title())

        except Exception:
            pass
    info('Finished Extracting MSDOC MetaData')
    return (user_names, software_list)

Example #10

0

Show file

File: oletimes.py Project: viper-framework/oletools

def main():
    # print banner with version
    print('oletimes %s - http://decalage.info/python/oletools' % __version__)

    try:
        ole = olefile.OleFileIO(sys.argv[1])
    except IndexError:
        sys.exit(__doc__)

    def dt2str(dt):
        """
        Convert a datetime object to a string for display, without microseconds

        :param dt: datetime.datetime object, or None
        :return: str, or None
        """
        if dt is None:
            return None
        dt = dt.replace(microsecond=0)
        return str(dt)

    t = prettytable.PrettyTable(
        ['Stream/Storage name', 'Modification Time', 'Creation Time'])
    t.align = 'l'
    t.max_width = 26
    #t.border = False

    #print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
    t.add_row(
        ('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))

    for obj in ole.listdir(streams=True, storages=True):
        #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
        t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)),
                   dt2str(ole.getctime(obj))))

    print(t)

    ole.close()

Example #11

0

Show file

    def analyze(self, afile):
        '''Analyze OLE files and extract metadata about the file into the 
		FileAnalysis object.

		Args:
			afile (FileAnalysis): The file to be analyzed.
		
		Returns:
			None
		'''
        if afile.mime_type in self.analyzed_mimes:

            # Parse the metadata for the ole file and add all ole metadata
            # attributes to the FileAnalysis object. This should add a ton
            # of contectual information to the file.
            try:
                ole = olefile.OleFileIO(afile.path)
                process_metadata = True
            except IOError:
                afile.errors = afile.errors + [
                    'doc plugin: unsupported filetype'
                ]
                output = 'None'
                afile.plugin_output[self.__NAME__] = output
                process_metadata = False
            # There are OLE files out there with LOTS of embedded objects.
            # This should prevent plugin crashes for those cases.
            except RuntimeError:
                afile.errors = afile.errors + [
                    'doc plugin: max recursion reached'
                ]
                output = 'None'
                process_metadata = False
                afile.suspicious = True
                process_metadata = False

            if process_metadata:
                meta = ole.get_metadata()
                # These loops iterate through the meta for attributes and then
                # set attributes with the same name in the FileAnalysis object
                for prop in meta.SUMMARY_ATTRIBS:
                    value = getattr(meta, prop)
                    setattr(afile, prop, value)

                for prop in meta.DOCSUM_ATTRIBS:
                    value = getattr(meta, prop)
                    setattr(afile, prop, value)

                # Thumbnails are binary streams and muck up the output so they
                # are removed. This is a temporary work-around... the doc
                # analyzer will be rewritten to accomidate things like this
                if hasattr(afile, 'thumbnail'):
                    afile.has_thumbnail = True
                    del afile.thumbnail

                # Explicitly call close to ensure that the ole object gets closed
                ole.close()

            # Parse the file again, this time looking for VBA scripts.
            try:
                parser = olevba.VBA_Parser(afile.path)
            except TypeError:
                afile.errors = afile.errors + [
                    'doc plugin: unsupported filetype'
                ]
                output = 'None'
                afile.plugin_output[self.__NAME__] = output
                return

            results = parser.analyze_macros()

            contains_macro = parser.detect_vba_macros()
            if contains_macro and self.alert_on_macro:
                afile.alert = True
            if contains_macro and self.suspicious_on_macro:
                afile.suspicious = True

            output = ''

            if results is not None:
                for result in results:
                    output = output + '[%s] keyword: %s description: %s' % result
            else:
                output = 'None'

            if contains_macro:
                afile.vba = parser.reveal()

            afile.plugin_output[self.__NAME__] = output

            # The parser requires an explicit close
            parser.close()

Example #12

0

Show file

File: oledir.py Project: wangsz05/oletools

def main():
    usage = 'usage: oledir [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r", action="store_true", dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
                      help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
    parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
                      help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
    # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
    #                         help="logging level debug/info/warning/error/critical (default=%default)")

    # TODO: add logfile option

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print(BANNER)
        print(__doc__)
        parser.print_help()
        sys.exit()

    # print banner with version
    print(BANNER)

    if os.name == 'nt':
        colorclass.Windows.enable(auto_colors=True, reset_atexit=True)

    for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
                                                      zip_password=options.zip_password, zip_fname=options.zip_fname):
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename, container) if container else filename
        print('OLE directory entries in file %s:' % full_name)
        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)
        # ole.dumpdirectory()

        # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
        # t.align = 'l'
        # t.max_width['id'] = 4
        # t.max_width['Status'] = 6
        # t.max_width['Type'] = 10
        # t.max_width['Name'] = 10
        # t.max_width['Left'] = 5
        # t.max_width['Right'] = 5
        # t.max_width['Child'] = 5
        # t.max_width['1st Sect'] = 8
        # t.max_width['Size'] = 6

        table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
            header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
            style=tablestream.TableStyleSlim)

        # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
        # TODO: OR fix olefile!
        # TODO: olefile should store or give access to the raw direntry data on demand
        # TODO: oledir option to hexdump the raw direntries
        # TODO: olefile should be less picky about incorrect directory structures

        for id in range(len(ole.direntries)):
            d = ole.direntries[id]
            if d is None:
                # this direntry is not part of the tree: either unused or an orphan
                d = ole._load_direntry(id) #ole.direntries[id]
                # print('%03d: %s *** ORPHAN ***' % (id, d.name))
                if d.entry_type == olefile.STGTY_EMPTY:
                    status = 'unused'
                else:
                    status = 'ORPHAN'
            else:
                # print('%03d: %s' % (id, d.name))
                status = '<Used>'
            if d.name.startswith('\x00'):
                # this may happen with unused entries, the name may be filled with zeroes
                name = ''
            else:
                # handle non-printable chars using repr(), remove quotes:
                name = repr(d.name)[1:-1]
            left  = sid_display(d.sid_left)
            right = sid_display(d.sid_right)
            child = sid_display(d.sid_child)
            entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
            etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
            status_color = STATUS_COLORS.get(status, 'red')

            # print('      type=%7s sid_left=%s sid_right=%s sid_child=%s'
            #       %(entry_type, left, right, child))
            # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
            table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
                colors=(None, status_color, etype_color, None, None, None, None, None, None))
        ole.close()

Example #13

0

Show file

File: oleobj.py Project: penrightpen/oletools

def find_ole(filename, data):
    """ try to open somehow as zip/ole/rtf/... ; yield None if fail

    If data is given, filename is (mostly) ignored.

    yields embedded ole streams in form of OleFileIO.
    """

    if data is not None:
        # isOleFile and is_ppt can work on data directly but zip need file
        # --> wrap data in a file-like object without copying data
        log.debug('working on data, file is not touched below')
        arg_for_ole = data
        arg_for_zip = FakeFile(data)
    else:
        # we only have a file name
        log.debug('working on file by name')
        arg_for_ole = filename
        arg_for_zip = filename

    ole = None
    try:
        if olefile.isOleFile(arg_for_ole):
            if is_ppt(arg_for_ole):
                log.info('is ppt file: ' + filename)
                for ole in find_ole_in_ppt(arg_for_ole):
                    yield ole
                    ole = None  # is closed in find_ole_in_ppt
            # in any case: check for embedded stuff in non-sectored streams
            log.info('is ole file: ' + filename)
            ole = olefile.OleFileIO(arg_for_ole)
            yield ole
        elif is_zipfile(arg_for_zip):
            log.info('is zip file: ' + filename)
            zipper = ZipFile(arg_for_zip, 'r')
            for subfile in zipper.namelist():
                head = b''
                try:
                    with zipper.open(subfile) as file_handle:
                        head = file_handle.read(len(olefile.MAGIC))
                except RuntimeError:
                    log.error('zip is encrypted: ' + filename)
                    yield None
                    continue

                if head == olefile.MAGIC:
                    log.info('  unzipping ole: ' + subfile)
                    with ZipSubFile(zipper, subfile) as file_handle:
                        try:
                            ole = olefile.OleFileIO(file_handle)
                            yield ole
                        except IOError:
                            log.warning('Error reading data from {0}/{1} or '
                                        'interpreting it as OLE object'.format(
                                            filename, subfile))
                            log.debug('', exc_info=True)
                        finally:
                            if ole is not None:
                                ole.close()
                                ole = None
                else:
                    log.debug('unzip skip: ' + subfile)
        else:
            log.warning(
                'open failed: {0} (or its data) is neither zip nor OLE'.format(
                    filename))
            yield None
    except Exception:
        log.error('Caught exception opening {0}'.format(filename),
                  exc_info=True)
        yield None
    finally:
        if ole is not None:
            ole.close()

Example #14

0

Show file

        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)
        print ('[*] Timestamp')
        process_ole(ole)
        ole.close()


if __name__ == "__main__":
    try:
        stream = []
        var1 = sys.argv[1]
        ole = olefile.OleFileIO(var1)
        fileheader(ole)
        hwpsummary(ole)
        bin_list = stream_list(ole)
        if [s for s in bin_list if "BinData" in s]:
            decom = bin_data(ole, bin_list)
        try:
            detect_yara(decom)
        except Exception:
            print ()
            print ('Error, BinData not exist')
        ole.close()
        timestamp()
        
    except Exception as e:
        print (e)