Exemple #1
0
def main():
    try:
        ole = olefile.OleFileIO(sys.argv[1])
    except IndexError:
        sys.exit(__doc__)

    # parse and display metadata:
    meta = ole.get_metadata()

    # console output with UTF8 encoding:
    # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3
    console_utf8 = sys.stdout  #codecs.getwriter('utf8')(sys.stdout)

    # TODO: move similar code to a function

    print('Properties from the SummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.SUMMARY_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()
    print('')

    print('Properties from the DocumentSummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.DOCSUM_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()

    ole.close()
Exemple #2
0
def process_ole(ole):
    # parse and display metadata:
    meta = ole.get_metadata()

    # console output with UTF8 encoding:
    ensure_stdout_handles_unicode()

    # TODO: move similar code to a function

    print('Properties from the SummaryInformation stream:')
    t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'])
    for prop in meta.SUMMARY_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()
    print('')

    print('Properties from the DocumentSummaryInformation stream:')
    t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'])
    for prop in meta.DOCSUM_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()
Exemple #3
0
def show_fat(ole):
    print('FAT:')
    t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
    for i in range(len(ole.fat)):
        fat_value = ole.fat[i]
        fat_type = FAT_TYPES.get(fat_value, '<Data>')
        color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
        # compute offset based on sector size:
        offset = ole.sectorsize * (i + 1)
        # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
        t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
                    colors=[None, color_type, None, None])
    t.close()
    print('')
Exemple #4
0
def show_minifat(ole):
    print('MiniFAT:')
    # load MiniFAT if it wasn't already done:
    ole.loadminifat()
    t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
    for i in range(len(ole.minifat)):
        fat_value = ole.minifat[i]
        fat_type = FAT_TYPES.get(fat_value, 'Data')
        color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
        # TODO: compute offset
        # print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
        t.write_row(['%8X' % i, fat_type, 'N/A', '%8X' % fat_value],
                    colors=[None, color_type, None, None])
    t.close()
    print('')
Exemple #5
0
def main():
    """Called when running this file as script. Shows all info on input file."""
    # print banner with version
    print('oleid %s - http://decalage.info/oletools' % __version__)
    print('THIS IS WORK IN PROGRESS - Check updates regularly!')
    print('Please report any issue at '
          'https://github.com/decalage2/oletools/issues')
    print('')

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('input',
                        type=str,
                        nargs='*',
                        metavar='FILE',
                        help='Name of files to process')
    # parser.add_argument('-o', '--ole', action='store_true', dest='ole',
    #                   help='Parse an OLE file (e.g. Word, Excel) to look for '
    #                        'SWF in each stream')

    args = parser.parse_args()

    # Print help if no argurments are passed
    if len(args.input) == 0:
        parser.print_help()
        return

    log_helper.enable_logging()

    for filename in args.input:
        print('Filename:', filename)
        oleid = OleID(filename)
        indicators = oleid.check()

        #TODO: add description
        #TODO: highlight suspicious indicators
        table = tablestream.TableStream(
            [20, 20, 10, 26],
            header_row=['Indicator', 'Value', 'Risk', 'Description'],
            style=tablestream.TableStyleSlimSep)
        for indicator in indicators:
            if not (indicator.hide_if_false and not indicator.value):
                #print '%s: %s' % (indicator.name, indicator.value)
                color = risk_color.get(indicator.risk, None)
                table.write_row((indicator.name, indicator.value,
                                 indicator.risk, indicator.description),
                                colors=(color, color, color, None))
        table.close()
Exemple #6
0
def main():
    """
    Main function, called when olevba is run from the command line
    """
    global log
    DEFAULT_LOG_LEVEL = "warning"  # Default log level
    LOG_LEVELS = {
        'debug': logging.DEBUG,
        'info': logging.INFO,
        'warning': logging.WARNING,
        'error': logging.ERROR,
        'critical': logging.CRITICAL
    }

    usage = 'usage: %prog [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r",
                      action="store_true",
                      dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option(
        "-z",
        "--zip",
        dest='zip_password',
        type='str',
        default=None,
        help=
        'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)'
    )
    parser.add_option(
        "-f",
        "--zipfname",
        dest='zip_fname',
        type='str',
        default='*',
        help=
        'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)'
    )
    parser.add_option(
        '-l',
        '--loglevel',
        dest="loglevel",
        action="store",
        default=DEFAULT_LOG_LEVEL,
        help=
        "logging level debug/info/warning/error/critical (default=%default)")
    parser.add_option("-m",
                      '--matches',
                      action="store_true",
                      dest="show_matches",
                      help='Show matched strings.')

    # TODO: add logfile option

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print('MacroRaptor %s - http://decalage.info/python/oletools' %
              __version__)
        print('This is work in progress, please report issues at %s' %
              URL_ISSUES)
        print(__doc__)
        parser.print_help()
        print('\nAn exit code is returned based on the analysis result:')
        for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK,
                       Result_Error, Result_Suspicious):
            print(' - %d: %s' % (result.exit_code, result.name))
        sys.exit()

    # print banner with version
    print('MacroRaptor %s - http://decalage.info/python/oletools' %
          __version__)
    print('This is work in progress, please report issues at %s' % URL_ISSUES)

    logging.basicConfig(level=LOG_LEVELS[options.loglevel],
                        format='%(levelname)-8s %(message)s')
    # enable logging in the modules:
    log.setLevel(logging.NOTSET)

    t = tablestream.TableStream(style=tablestream.TableStyleSlim,
                                header_row=['Result', 'Flags', 'Type', 'File'],
                                column_width=[10, 5, 4, 56])

    exitcode = -1
    global_result = None
    # TODO: handle errors in xglob, to continue processing the next files
    for container, filename, data in xglob.iter_files(
            args,
            recursive=options.recursive,
            zip_password=options.zip_password,
            zip_fname=options.zip_fname):
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename,
                                  container) if container else filename
        # try:
        #     # Open the file
        #     if data is None:
        #         data = open(filename, 'rb').read()
        # except:
        #     log.exception('Error when opening file %r' % full_name)
        #     continue
        if isinstance(data, Exception):
            result = Result_Error
            t.write_row([result.name, '', '', full_name],
                        colors=[result.color, None, None, None])
            t.write_row(['', '', '', str(data)],
                        colors=[None, None, None, result.color])
        else:
            filetype = '???'
            try:
                vba_parser = olevba.VBA_Parser(filename=filename,
                                               data=data,
                                               container=container)
                filetype = TYPE2TAG[vba_parser.type]
            except Exception as e:
                # log.error('Error when parsing VBA macros from file %r' % full_name)
                # TODO: distinguish actual errors from non-MSOffice files
                result = Result_Error
                t.write_row([result.name, '', filetype, full_name],
                            colors=[result.color, None, None, None])
                t.write_row(['', '', '', str(e)],
                            colors=[None, None, None, result.color])
                continue
            if vba_parser.detect_vba_macros():
                vba_code_all_modules = ''
                try:
                    for (subfilename, stream_path, vba_filename,
                         vba_code) in vba_parser.extract_all_macros():
                        vba_code_all_modules += vba_code.decode(
                            'utf-8', 'replace') + '\n'
                except Exception as e:
                    # log.error('Error when parsing VBA macros from file %r' % full_name)
                    result = Result_Error
                    t.write_row([
                        result.name, '', TYPE2TAG[vba_parser.type], full_name
                    ],
                                colors=[result.color, None, None, None])
                    t.write_row(['', '', '', str(e)],
                                colors=[None, None, None, result.color])
                    continue
                mraptor = MacroRaptor(vba_code_all_modules)
                mraptor.scan()
                if mraptor.suspicious:
                    result = Result_Suspicious
                else:
                    result = Result_MacroOK
                t.write_row(
                    [result.name,
                     mraptor.get_flags(), filetype, full_name],
                    colors=[result.color, None, None, None])
                if mraptor.matches and options.show_matches:
                    t.write_row(['', '', '', 'Matches: %r' % mraptor.matches])
            else:
                result = Result_NoMacro
                t.write_row([result.name, '', filetype, full_name],
                            colors=[result.color, None, None, None])
        if result.exit_code > exitcode:
            global_result = result
            exitcode = result.exit_code

    print('')
    print('Flags: A=AutoExec, W=Write, X=Execute')
    print('Exit code: %d - %s' % (exitcode, global_result.name))
    sys.exit(exitcode)
Exemple #7
0
def show_header(ole, extra_data=False):
    print("OLE HEADER:")
    t = tablestream.TableStream(
        [24, 16, 79 - (4 + 24 + 16)],
        header_row=['Attribute', 'Value', 'Description'])
    t.write_row([
        'OLE Signature (hex)',
        binascii.b2a_hex(ole.header_signature).upper(),
        'Should be D0CF11E0A1B11AE1'
    ])
    t.write_row([
        'Header CLSID (hex)',
        binascii.b2a_hex(ole.header_clsid).upper(), 'Should be 0'
    ])
    t.write_row(
        ['Minor Version',
         '%04X' % ole.minor_version, 'Should be 003E'])
    t.write_row(
        ['Major Version',
         '%04X' % ole.dll_version, 'Should be 3 or 4'])
    t.write_row([
        'Byte Order',
        '%04X' % ole.byte_order, 'Should be FFFE (little endian)'
    ])
    t.write_row(
        ['Sector Shift',
         '%04X' % ole.sector_shift, 'Should be 0009 or 000C'])
    t.write_row([
        '# of Dir Sectors', ole.num_dir_sectors,
        'Should be 0 if major version is 3'
    ])
    t.write_row(['# of FAT Sectors', ole.num_fat_sectors, ''])
    t.write_row(['First Dir Sector', '%08X' % ole.first_dir_sector, '(hex)'])
    t.write_row([
        'Transaction Sig Number', ole.transaction_signature_number,
        'Should be 0'
    ])
    t.write_row([
        'MiniStream cutoff', ole.mini_stream_cutoff_size,
        'Should be 4096 bytes'
    ])
    t.write_row(
        ['First MiniFAT Sector',
         '%08X' % ole.first_mini_fat_sector, '(hex)'])
    t.write_row(['# of MiniFAT Sectors', ole.num_mini_fat_sectors, ''])
    t.write_row(
        ['First DIFAT Sector',
         '%08X' % ole.first_difat_sector, '(hex)'])
    t.write_row(['# of DIFAT Sectors', ole.num_difat_sectors, ''])
    t.close()
    print('')
    print("CALCULATED ATTRIBUTES:")
    t = tablestream.TableStream(
        [24, 16, 79 - (4 + 24 + 16)],
        header_row=['Attribute', 'Value', 'Description'])
    t.write_row([
        'Sector Size (bytes)', ole.sector_size, 'Should be 512 or 4096 bytes'
    ])
    t.write_row(
        ['Actual File Size (bytes)', ole._filesize, 'Real file size on disk'])
    num_sectors_per_fat_sector = ole.sector_size / 4
    num_sectors_in_fat = num_sectors_per_fat_sector * ole.num_fat_sectors
    # Need to add one sector for the header:
    max_filesize_fat = (num_sectors_in_fat + 1) * ole.sector_size
    t.write_row([
        'Max File Size in FAT', max_filesize_fat,
        'Max file size covered by FAT'
    ])
    if ole._filesize > max_filesize_fat:
        extra_size_beyond_fat = ole._filesize - max_filesize_fat
        color = 'red'
    else:
        extra_size_beyond_fat = 0
        color = None
    t.write_row([
        'Extra data beyond FAT', extra_size_beyond_fat,
        'Only if file is larger than FAT coverage'
    ],
                colors=[color, color, color])
    # Find the last used sector:
    # By default, it's the last sector in the FAT
    last_used_sector = len(ole.fat) - 1
    for i in range(len(ole.fat) - 1, 0, -1):
        last_used_sector = i
        if ole.fat[i] != olefile.FREESECT:
            break
    # Extra data would start at the next sector
    offset_extra_data = ole.sectorsize * (last_used_sector + 2)
    t.write_row([
        'Extra data offset in FAT',
        '%08X' % offset_extra_data,
        'Offset of the 1st free sector at end of FAT'
    ])
    extra_data_size = ole._filesize - offset_extra_data
    color = 'red' if extra_data_size > 0 else None
    t.write_row([
        'Extra data size', extra_data_size,
        'Size of data starting at the 1st free sector at end of FAT'
    ],
                colors=[color, color, color])
    t.close()
    print('')

    if extra_data:
        # hex dump of extra data
        print('HEX DUMP OF EXTRA DATA:\n')
        if extra_data_size <= 0:
            print('No extra data found at end of file.')
        else:
            ole.fp.seek(offset_extra_data)
            # read until end of file:
            exdata = ole.fp.read()
            assert len(exdata) == extra_data_size
            print(hexdump3(exdata, length=16, startindex=offset_extra_data))
        print('')
Exemple #8
0
def process_file(container,
                 filename,
                 data,
                 output_dir=None,
                 save_object=False):
    if output_dir:
        if not os.path.isdir(output_dir):
            log.info('creating output directory %s' % output_dir)
            os.mkdir(output_dir)

        fname_prefix = os.path.join(output_dir, sanitize_filename(filename))
    else:
        base_dir = os.path.dirname(filename)
        sane_fname = sanitize_filename(filename)
        fname_prefix = os.path.join(base_dir, sane_fname)

    # TODO: option to extract objects to files (false by default)
    if data is None:
        data = open(filename, 'rb').read()
    print('=' * 79)
    print('File: %r - size: %d bytes' % (filename, len(data)))
    tstream = tablestream.TableStream(column_width=(3, 10, 31, 31),
                                      header_row=('id', 'index', 'OLE Object',
                                                  'OLE Package'),
                                      style=tablestream.TableStyleSlim)
    rtfp = RtfObjParser(data)
    rtfp.parse()
    for rtfobj in rtfp.objects:
        ole_color = None
        pkg_color = None
        if rtfobj.is_ole:
            ole_column = 'format_id: %d ' % rtfobj.format_id
            if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED:
                ole_column += '(Embedded)\n'
            elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED:
                ole_column += '(Linked)\n'
            else:
                ole_column += '(Unknown)\n'
            ole_column += 'class name: %r\n' % rtfobj.class_name
            # if the object is linked and not embedded, data_size=None:
            if rtfobj.oledata_size is None:
                ole_column += 'data size: N/A'
            else:
                ole_column += 'data size: %d' % rtfobj.oledata_size
            if rtfobj.is_package:
                pkg_column = 'Filename: %r\n' % rtfobj.filename
                pkg_column += 'Source path: %r\n' % rtfobj.src_path
                pkg_column += 'Temp path = %r' % rtfobj.temp_path
                pkg_color = 'yellow'
                # check if the file extension is executable:
                _, ext = os.path.splitext(rtfobj.filename)
                log.debug('File extension: %r' % ext)
                if re_executable_extensions.match(ext):
                    pkg_color = 'red'
                    pkg_column += '\nEXECUTABLE FILE'
            else:
                pkg_column = 'Not an OLE Package'
            # Detect OLE2Link exploit
            # http://www.kb.cert.org/vuls/id/921560
            if rtfobj.class_name == 'OLE2Link':
                ole_color = 'red'
                ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)'
        else:
            pkg_column = ''
            ole_column = 'Not a well-formed OLE object'
        tstream.write_row(
            (
                rtfp.objects.index(rtfobj),
                # filename,
                '%08Xh' % rtfobj.start,
                ole_column,
                pkg_column),
            colors=(None, None, ole_color, pkg_color))
        tstream.write_sep()
    if save_object:
        if save_object == 'all':
            objects = rtfp.objects
        else:
            try:
                i = int(save_object)
                objects = [rtfp.objects[i]]
            except:
                log.error(
                    'The -s option must be followed by an object index or all, such as "-s 2" or "-s all"'
                )
                return
        for rtfobj in objects:
            i = objects.index(rtfobj)
            if rtfobj.is_package:
                print('Saving file from OLE Package in object #%d:' % i)
                print('  Filename = %r' % rtfobj.filename)
                print('  Source path = %r' % rtfobj.src_path)
                print('  Temp path = %r' % rtfobj.temp_path)
                if rtfobj.filename:
                    fname = '%s_%s' % (fname_prefix,
                                       sanitize_filename(rtfobj.filename))
                else:
                    fname = '%s_object_%08X.noname' % (fname_prefix,
                                                       rtfobj.start)
                print('  saving to file %s' % fname)
                open(fname, 'wb').write(rtfobj.olepkgdata)
            # When format_id=TYPE_LINKED, oledata_size=None
            elif rtfobj.is_ole and rtfobj.oledata_size is not None:
                print('Saving file embedded in OLE object #%d:' % i)
                print('  format_id  = %d' % rtfobj.format_id)
                print('  class name = %r' % rtfobj.class_name)
                print('  data size  = %d' % rtfobj.oledata_size)
                # set a file extension according to the class name:
                class_name = rtfobj.class_name.lower()
                if class_name.startswith(b'word'):
                    ext = 'doc'
                elif class_name.startswith(b'package'):
                    ext = 'package'
                else:
                    ext = 'bin'
                fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
                print('  saving to file %s' % fname)
                open(fname, 'wb').write(rtfobj.oledata)
            else:
                print('Saving raw data in object #%d:' % i)
                fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
                print('  saving object to file %s' % fname)
                open(fname, 'wb').write(rtfobj.rawdata)
Exemple #9
0
def main():
    usage = 'usage: oledir [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r", action="store_true", dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
                      help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
    parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
                      help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
    # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
    #                         help="logging level debug/info/warning/error/critical (default=%default)")

    # TODO: add logfile option

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print(BANNER)
        print(__doc__)
        parser.print_help()
        sys.exit()

    # print banner with version
    print(BANNER)

    if os.name == 'nt':
        colorclass.Windows.enable(auto_colors=True, reset_atexit=True)

    for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
                                                      zip_password=options.zip_password, zip_fname=options.zip_fname):
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename, container) if container else filename
        print('OLE directory entries in file %s:' % full_name)
        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)
        # ole.dumpdirectory()

        # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
        # t.align = 'l'
        # t.max_width['id'] = 4
        # t.max_width['Status'] = 6
        # t.max_width['Type'] = 10
        # t.max_width['Name'] = 10
        # t.max_width['Left'] = 5
        # t.max_width['Right'] = 5
        # t.max_width['Child'] = 5
        # t.max_width['1st Sect'] = 8
        # t.max_width['Size'] = 6

        table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
            header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
            style=tablestream.TableStyleSlim)

        # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
        # TODO: OR fix olefile!
        # TODO: olefile should store or give access to the raw direntry data on demand
        # TODO: oledir option to hexdump the raw direntries
        # TODO: olefile should be less picky about incorrect directory structures

        for id in range(len(ole.direntries)):
            d = ole.direntries[id]
            if d is None:
                # this direntry is not part of the tree: either unused or an orphan
                d = ole._load_direntry(id) #ole.direntries[id]
                # print('%03d: %s *** ORPHAN ***' % (id, d.name))
                if d.entry_type == olefile.STGTY_EMPTY:
                    status = 'unused'
                else:
                    status = 'ORPHAN'
            else:
                # print('%03d: %s' % (id, d.name))
                status = '<Used>'
            if d.name.startswith('\x00'):
                # this may happen with unused entries, the name may be filled with zeroes
                name = ''
            else:
                # handle non-printable chars using repr(), remove quotes:
                name = repr(d.name)[1:-1]
            left  = sid_display(d.sid_left)
            right = sid_display(d.sid_right)
            child = sid_display(d.sid_child)
            entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
            etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
            status_color = STATUS_COLORS.get(status, 'red')

            # print('      type=%7s sid_left=%s sid_right=%s sid_child=%s'
            #       %(entry_type, left, right, child))
            # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
            table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
                colors=(None, status_color, etype_color, None, None, None, None, None, None))
        ole.close()
Exemple #10
0
def process_output(meta, output):

    # console output with UTF8 encoding:
    ensure_stdout_handles_unicode()

    # TODO: move similar code to a function
    if output == 'table':
        print('Properties from the SummaryInformation stream:')
        t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'])
        for prop in meta.SUMMARY_ATTRIBS:
            value = getattr(meta, prop)
            if value is not None:
                # TODO: pretty printing for strings, dates, numbers
                # TODO: better unicode handling
                # print('- %s: %s' % (prop, value))
                # if isinstance(value, unicode):
                #     # encode to UTF8, avoiding errors
                #     value = value.encode('utf-8', errors='replace')
                # else:
                #     value = str(value)
                t.write_row([prop, value], colors=[None, 'yellow'])
        t.close()
        print('')

        print('Properties from the DocumentSummaryInformation stream:')
        t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'])
        for prop in meta.DOCSUM_ATTRIBS:
            value = getattr(meta, prop)
            if value is not None:
                # TODO: pretty printing for strings, dates, numbers
                # TODO: better unicode handling
                # print('- %s: %s' % (prop, value))
                # if isinstance(value, unicode):
                #     # encode to UTF8, avoiding errors
                #     value = value.encode('utf-8', errors='replace')
                # else:
                #     value = str(value)
                t.write_row([prop, value], colors=[None, 'yellow'])
        t.close()
    else:
        # initalize a dictionary with keys for each type of attribute
        # update props/values like the table would
        output_dict = {"SUMMARY_ATTRIBS": {}, "DOCSUM_ATTRIBS": {}}
        for prop in meta.SUMMARY_ATTRIBS:
            value = getattr(meta, prop)
            if value:
                value = clean_output(value)
                output_dict['SUMMARY_ATTRIBS'][prop] = value
            else:
                # pass for now, when logging is enabled log as warning
                # logger.warning("Unable to log {}: {}".format(prop, value))
                pass
        for prop in meta.DOCSUM_ATTRIBS:
            value = getattr(meta, prop)
            if value:
                value = clean_output(value)
                output_dict['DOCSUM_ATTRIBS'][prop] = value
            else:
                # pass for now, when logging is enabled log as warning
                # logger.warning("Unable to log {}: {}".format(prop, value))
                pass

        return output_dict
Exemple #11
0
def process_file(container, filename, data, output_dir=None, save_object=False):
    if output_dir:
        if not os.path.isdir(output_dir):
            log.info('creating output directory %s' % output_dir)
            os.mkdir(output_dir)

        fname_prefix = os.path.join(output_dir,
                                    sanitize_filename(filename))
    else:
        base_dir = os.path.dirname(filename)
        sane_fname = sanitize_filename(filename)
        fname_prefix = os.path.join(base_dir, sane_fname)

    # TODO: option to extract objects to files (false by default)
    if data is None:
        data = open(filename, 'rb').read()
    print('='*79)
    print('File: %r - size: %d bytes' % (filename, len(data)))
    tstream = tablestream.TableStream(
        column_width=(3, 10, 63),
        header_row=('id', 'index', 'OLE Object'),
        style=tablestream.TableStyleSlim
    )
    rtfp = RtfObjParser(data)
    rtfp.parse()
    for rtfobj in rtfp.objects:
        ole_color = None
        if rtfobj.is_ole:
            ole_column = 'format_id: %d ' % rtfobj.format_id
            if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED:
                ole_column += '(Embedded)\n'
            elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED:
                ole_column += '(Linked)\n'
            else:
                ole_column += '(Unknown)\n'
            ole_column += 'class name: %r\n' % rtfobj.class_name
            # if the object is linked and not embedded, data_size=None:
            if rtfobj.oledata_size is None:
                ole_column += 'data size: N/A'
            else:
                ole_column += 'data size: %d' % rtfobj.oledata_size
            if rtfobj.is_package:
                ole_column += '\nOLE Package object:'
                ole_column += '\nFilename: %r' % rtfobj.filename
                ole_column += '\nSource path: %r' % rtfobj.src_path
                ole_column += '\nTemp path = %r' % rtfobj.temp_path
                ole_column += '\nMD5 = %r' % rtfobj.olepkgdata_md5
                ole_color = 'yellow'
                # check if the file extension is executable:

                _, temp_ext = os.path.splitext(rtfobj.temp_path)
                log.debug('Temp path extension: %r' % temp_ext)
                _, file_ext = os.path.splitext(rtfobj.filename)
                log.debug('File extension: %r' % file_ext)

                if temp_ext != file_ext:
                    ole_column += "\nMODIFIED FILE EXTENSION"

                if re_executable_extensions.match(temp_ext) or re_executable_extensions.match(file_ext):
                    ole_color = 'red'
                    ole_column += '\nEXECUTABLE FILE'
            else:
                ole_column += '\nMD5 = %r' % rtfobj.oledata_md5
            if rtfobj.clsid is not None:
                ole_column += '\nCLSID: %s' % rtfobj.clsid
                ole_column += '\n%s' % rtfobj.clsid_desc
                if 'CVE' in rtfobj.clsid_desc:
                    ole_color = 'red'
            # Detect OLE2Link exploit
            # http://www.kb.cert.org/vuls/id/921560
            if rtfobj.class_name == b'OLE2Link':
                ole_color = 'red'
                ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)\n'
                # https://bitbucket.org/snippets/Alexander_Hanel/7Adpp
                found_list =  re.findall(r'[a-fA-F0-9\x0D\x0A]{128,}',data)
                urls = []
                for item in found_list:
                    try:
                        temp = item.replace("\x0D\x0A","").decode("hex")
                    except:
                        continue
                    pat = re.compile(r'(?:[\x20-\x7E][\x00]){3,}')
                    words = [w.decode('utf-16le') for w in pat.findall(temp)]
                    for w in words:
                        if "http" in w:
                            urls.append(w)
                urls = sorted(set(urls))
                if urls:
                    ole_column += 'URL extracted: ' + ', '.join(urls)
            # Detect Equation Editor exploit
            # https://www.kb.cert.org/vuls/id/421280/
            elif rtfobj.class_name.lower() == b'equation.3':
                ole_color = 'red'
                ole_column += '\nPossibly an exploit for the Equation Editor vulnerability (VU#421280, CVE-2017-11882)'
        else:
            ole_column = 'Not a well-formed OLE object'
        tstream.write_row((
            rtfp.objects.index(rtfobj),
            # filename,
            '%08Xh' % rtfobj.start,
            ole_column
            ), colors=(None, None, ole_color)
        )
        tstream.write_sep()
    if save_object:
        if save_object == 'all':
            objects = rtfp.objects
        else:
            try:
                i = int(save_object)
                objects = [ rtfp.objects[i] ]
            except:
                log.error('The -s option must be followed by an object index or all, such as "-s 2" or "-s all"')
                return
        for rtfobj in objects:
            i = objects.index(rtfobj)
            if rtfobj.is_package:
                print('Saving file from OLE Package in object #%d:' % i)
                print('  Filename = %r' % rtfobj.filename)
                print('  Source path = %r' % rtfobj.src_path)
                print('  Temp path = %r' % rtfobj.temp_path)
                if rtfobj.filename:
                    fname = '%s_%s' % (fname_prefix,
                                       sanitize_filename(rtfobj.filename))
                else:
                    fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start)
                print('  saving to file %s' % fname)
                print('  md5 %s' % rtfobj.olepkgdata_md5)
                open(fname, 'wb').write(rtfobj.olepkgdata)
            # When format_id=TYPE_LINKED, oledata_size=None
            elif rtfobj.is_ole and rtfobj.oledata_size is not None:
                print('Saving file embedded in OLE object #%d:' % i)
                print('  format_id  = %d' % rtfobj.format_id)
                print('  class name = %r' % rtfobj.class_name)
                print('  data size  = %d' % rtfobj.oledata_size)
                # set a file extension according to the class name:
                class_name = rtfobj.class_name.lower()
                if class_name.startswith(b'word'):
                    ext = 'doc'
                elif class_name.startswith(b'package'):
                    ext = 'package'
                else:
                    ext = 'bin'
                fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
                print('  saving to file %s' % fname)
                print('  md5 %s' % rtfobj.oledata_md5)
                open(fname, 'wb').write(rtfobj.oledata)
            else:
                print('Saving raw data in object #%d:' % i)
                fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
                print('  saving object to file %s' % fname)
                print('  md5 %s' % rtfobj.rawdata_md5)
                open(fname, 'wb').write(rtfobj.rawdata)