Esempio n. 1
0
def main():
    try:
        ole = olefile.OleFileIO(sys.argv[1])
    except IndexError:
        sys.exit(__doc__)

    # parse and display metadata:
    meta = ole.get_metadata()

    # console output with UTF8 encoding:
    # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3
    console_utf8 = sys.stdout  #codecs.getwriter('utf8')(sys.stdout)

    # TODO: move similar code to a function

    print('Properties from the SummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.SUMMARY_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()
    print('')

    print('Properties from the DocumentSummaryInformation stream:')
    t = tablestream.TableStream([21, 30],
                                header_row=['Property', 'Value'],
                                outfile=console_utf8)
    for prop in meta.DOCSUM_ATTRIBS:
        value = getattr(meta, prop)
        if value is not None:
            # TODO: pretty printing for strings, dates, numbers
            # TODO: better unicode handling
            # print('- %s: %s' % (prop, value))
            # if isinstance(value, unicode):
            #     # encode to UTF8, avoiding errors
            #     value = value.encode('utf-8', errors='replace')
            # else:
            #     value = str(value)
            t.write_row([prop, value], colors=[None, 'yellow'])
    t.close()

    ole.close()
Esempio n. 2
0
def main():
    # print banner with version
    print('olemap %s - http://decalage.info/python/oletools' % __version__)

    fname = sys.argv[1]
    ole = olefile.OleFileIO(fname)

    print('FAT:')
    t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
    for i in range(ole.nb_sect):
        fat_value = ole.fat[i]
        fat_type = FAT_TYPES.get(fat_value, '<Data>')
        color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
        # compute offset based on sector size:
        offset = ole.sectorsize * (i+1)
        # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
        t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
            colors=[None, color_type, None, None])
    print('')

    print('MiniFAT:')
    # load MiniFAT if it wasn't already done:
    ole.loadminifat()
    for i in range(len(ole.minifat)):
        fat_value = ole.minifat[i]
        fat_type = FAT_TYPES.get(fat_value, 'Data')
        print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))

    ole.close()
Esempio n. 3
0
def main():
    """
    Main function, called when olevba is run from the command line
    """
    global log
    DEFAULT_LOG_LEVEL = "warning"  # Default log level
    LOG_LEVELS = {
        'debug': logging.DEBUG,
        'info': logging.INFO,
        'warning': logging.WARNING,
        'error': logging.ERROR,
        'critical': logging.CRITICAL
    }

    usage = 'usage: %prog [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r",
                      action="store_true",
                      dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option(
        "-z",
        "--zip",
        dest='zip_password',
        type='str',
        default=None,
        help=
        'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)'
    )
    parser.add_option(
        "-f",
        "--zipfname",
        dest='zip_fname',
        type='str',
        default='*',
        help=
        'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)'
    )
    parser.add_option(
        '-l',
        '--loglevel',
        dest="loglevel",
        action="store",
        default=DEFAULT_LOG_LEVEL,
        help=
        "logging level debug/info/warning/error/critical (default=%default)")
    parser.add_option("-m",
                      '--matches',
                      action="store_true",
                      dest="show_matches",
                      help='Show matched strings.')

    # TODO: add logfile option

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print __doc__
        parser.print_help()
        print '\nAn exit code is returned based on the analysis result:'
        for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK,
                       Result_Error, Result_Suspicious):
            print ' - %d: %s' % (result.exit_code, result.name)
        sys.exit()

    # print banner with version
    print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__
    print 'This is work in progress, please report issues at %s' % URL_ISSUES

    logging.basicConfig(level=LOG_LEVELS[options.loglevel],
                        format='%(levelname)-8s %(message)s')
    # enable logging in the modules:
    log.setLevel(logging.NOTSET)

    t = tablestream.TableStream(style=tablestream.TableStyleSlim,
                                header_row=['Result', 'Flags', 'Type', 'File'],
                                column_width=[10, 5, 4, 56])

    exitcode = -1
    global_result = None
    # TODO: handle errors in xglob, to continue processing the next files
    for container, filename, data in xglob.iter_files(
            args,
            recursive=options.recursive,
            zip_password=options.zip_password,
            zip_fname=options.zip_fname):
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename,
                                  container) if container else filename
        # try:
        #     # Open the file
        #     if data is None:
        #         data = open(filename, 'rb').read()
        # except:
        #     log.exception('Error when opening file %r' % full_name)
        #     continue
        if isinstance(data, Exception):
            result = Result_Error
            t.write_row([result.name, '', '', full_name],
                        colors=[result.color, None, None, None])
            t.write_row(['', '', '', str(data)],
                        colors=[None, None, None, result.color])
        else:
            filetype = '???'
            try:
                vba_parser = olevba.VBA_Parser(filename=filename,
                                               data=data,
                                               container=container)
                filetype = TYPE2TAG[vba_parser.type]
            except Exception as e:
                # log.error('Error when parsing VBA macros from file %r' % full_name)
                # TODO: distinguish actual errors from non-MSOffice files
                result = Result_Error
                t.write_row([result.name, '', filetype, full_name],
                            colors=[result.color, None, None, None])
                t.write_row(['', '', '', str(e)],
                            colors=[None, None, None, result.color])
                continue
            if vba_parser.detect_vba_macros():
                vba_code_all_modules = ''
                try:
                    for (subfilename, stream_path, vba_filename,
                         vba_code) in vba_parser.extract_all_macros():
                        vba_code_all_modules += vba_code + '\n'
                except Exception as e:
                    # log.error('Error when parsing VBA macros from file %r' % full_name)
                    result = Result_Error
                    t.write_row([
                        result.name, '', TYPE2TAG[vba_parser.type], full_name
                    ],
                                colors=[result.color, None, None, None])
                    t.write_row(['', '', '', str(e)],
                                colors=[None, None, None, result.color])
                    continue
                mraptor = MacroRaptor(vba_code_all_modules)
                mraptor.scan()
                if mraptor.suspicious:
                    result = Result_Suspicious
                else:
                    result = Result_MacroOK
                t.write_row(
                    [result.name,
                     mraptor.get_flags(), filetype, full_name],
                    colors=[result.color, None, None, None])
                if mraptor.matches and options.show_matches:
                    t.write_row(['', '', '', 'Matches: %r' % mraptor.matches])
            else:
                result = Result_NoMacro
                t.write_row([result.name, '', filetype, full_name],
                            colors=[result.color, None, None, None])
        if result.exit_code > exitcode:
            global_result = result
            exitcode = result.exit_code

    print ''
    print 'Flags: A=AutoExec, W=Write, X=Execute'
    print 'Exit code: %d - %s' % (exitcode, global_result.name)
    sys.exit(exitcode)
Esempio n. 4
0
def process_file(container,
                 filename,
                 data,
                 output_dir=None,
                 save_object=False):
    if output_dir:
        if not os.path.isdir(output_dir):
            log.info('creating output directory %s' % output_dir)
            os.mkdir(output_dir)

        fname_prefix = os.path.join(output_dir, sanitize_filename(filename))
    else:
        base_dir = os.path.dirname(filename)
        sane_fname = sanitize_filename(filename)
        fname_prefix = os.path.join(base_dir, sane_fname)

    # TODO: option to extract objects to files (false by default)
    if data is None:
        data = open(filename, 'rb').read()
    print('=' * 79)
    print('File: %r - size: %d bytes' % (filename, len(data)))
    tstream = tablestream.TableStream(column_width=(3, 10, 31, 31),
                                      header_row=('id', 'index', 'OLE Object',
                                                  'OLE Package'),
                                      style=tablestream.TableStyleSlim)
    rtfp = RtfObjParser(data)
    rtfp.parse()
    for rtfobj in rtfp.objects:
        pkg_color = None
        if rtfobj.is_ole:
            ole_column = 'format_id: %d\n' % rtfobj.format_id
            ole_column += 'class name: %r\n' % rtfobj.class_name
            ole_column += 'data size: %d' % rtfobj.oledata_size
            if rtfobj.is_package:
                pkg_column = 'Filename: %r\n' % rtfobj.filename
                pkg_column += 'Source path: %r\n' % rtfobj.src_path
                pkg_column += 'Temp path = %r' % rtfobj.temp_path
                pkg_color = 'yellow'
                # check if the file extension is executable:
                _, ext = os.path.splitext(rtfobj.filename)
                log.debug('File extension: %r' % ext)
                if re_executable_extensions.match(ext):
                    pkg_color = 'red'
                    pkg_column += '\nEXECUTABLE FILE'
            else:
                pkg_column = 'Not an OLE Package'
        else:
            pkg_column = ''
            ole_column = 'Not a well-formed OLE object'
        tstream.write_row(
            (
                rtfp.objects.index(rtfobj),
                # filename,
                '%08Xh' % rtfobj.start,
                ole_column,
                pkg_column),
            colors=(None, None, None, pkg_color))
        tstream.write_sep()
    if save_object:
        if save_object == 'all':
            objects = rtfp.objects
        else:
            try:
                i = int(save_object)
                objects = [rtfp.objects[i]]
            except:
                log.error(
                    'The -s option must be followed by an object index or all, such as "-s 2" or "-s all"'
                )
                return
        for rtfobj in objects:
            i = objects.index(rtfobj)
            if rtfobj.is_package:
                print('Saving file from OLE Package in object #%d:' % i)
                print('  Filename = %r' % rtfobj.filename)
                print('  Source path = %r' % rtfobj.src_path)
                print('  Temp path = %r' % rtfobj.temp_path)
                if rtfobj.filename:
                    fname = '%s_%s' % (fname_prefix,
                                       sanitize_filename(rtfobj.filename))
                else:
                    fname = '%s_object_%08X.noname' % (fname_prefix,
                                                       rtfobj.start)
                print('  saving to file %s' % fname)
                open(fname, 'wb').write(rtfobj.olepkgdata)
            elif rtfobj.is_ole:
                print('Saving file embedded in OLE object #%d:' % i)
                print('  format_id  = %d' % rtfobj.format_id)
                print('  class name = %r' % rtfobj.class_name)
                print('  data size  = %d' % rtfobj.oledata_size)
                # set a file extension according to the class name:
                class_name = rtfobj.class_name.lower()
                if class_name.startswith(b'word'):
                    ext = 'doc'
                elif class_name.startswith(b'package'):
                    ext = 'package'
                else:
                    ext = 'bin'
                fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
                print('  saving to file %s' % fname)
                open(fname, 'wb').write(rtfobj.oledata)
            else:
                print('Saving raw data in object #%d:' % i)
                fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
                print('  saving object to file %s' % fname)
                open(fname, 'wb').write(rtfobj.rawdata)
Esempio n. 5
0
    olefile.FATSECT: "cyan",
    olefile.DIFSECT: "blue",
    'default': None,
}

# === MAIN ===================================================================

if __name__ == '__main__':
    # print banner with version
    print 'olemap %s - http://decalage.info/python/oletools' % __version__

    fname = sys.argv[1]
    ole = olefile.OleFileIO(fname)

    print 'FAT:'
    t = tablestream.TableStream(
        [8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
    for i in xrange(ole.nb_sect):
        fat_value = ole.fat[i]
        fat_type = FAT_TYPES.get(fat_value, '<Data>')
        color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
        # compute offset based on sector size:
        offset = ole.sectorsize * (i + 1)
        # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
        t.write_row(['%8X' % i, fat_type,
                     '%08X' % offset,
                     '%8X' % fat_value],
                    colors=[None, color_type, None, None])
    print ''

    print 'MiniFAT:'
    # load MiniFAT if it wasn't already done:
Esempio n. 6
0
try:
    ole = olefile.OleFileIO(sys.argv[1])
except IndexError:
    sys.exit(__doc__)

# parse and display metadata:
meta = ole.get_metadata()

# console output with UTF8 encoding:
console_utf8 = codecs.getwriter('utf8')(sys.stdout)

# TODO: move similar code to a function

print('Properties from the SummaryInformation stream:')
t = tablestream.TableStream([21, 30],
                            header_row=['Property', 'Value'],
                            outfile=console_utf8)
for prop in meta.SUMMARY_ATTRIBS:
    value = getattr(meta, prop)
    if value is not None:
        # TODO: pretty printing for strings, dates, numbers
        # TODO: better unicode handling
        # print('- %s: %s' % (prop, value))
        if isinstance(value, unicode):
            # encode to UTF8, avoiding errors
            value = value.encode('utf-8', errors='replace')
        else:
            value = str(value)
        t.write_row([prop, value], colors=[None, 'yellow'])
t.close()
print ''
Esempio n. 7
0
def extract_hunt_info(filter_str, limit, print_only=False, date_from=None):
    logging.debug("Filter String: {} - Limit: {}".format(filter_str, limit))

    hunts = vt3.get_hunting_notification_files(filter=filter_str, limit=limit)

    hunt_infos = {"count": len(hunts), "info": []}
    color_scheme = None
    if print_only:
        tstream = tablestream.TableStream(column_width=(33, 4, 20, 100, 10),
                                          header_row=('md5', 'hits', 'rule',
                                                      'Details', 'first_seen'),
                                          style=tablestream.TableStyleSlim)

    for hunt in hunts:
        hunt_attributes = hunt.get('attributes', {})
        sha256 = hunt.get('attributes', {}).get('sha256')
        md5 = hunt.get('attributes', {}).get('md5')
        first_seen_ts = hunt.get('attributes', {}).get('first_submission_date')
        first_seen = datetime.datetime.fromtimestamp(first_seen_ts)
        if date_from:
            fs_date_from = datetime.datetime.strptime(date_from, '%Y-%m-%d')
            if first_seen < fs_date_from:
                logging.debug("Skipping this sha256: {} first_seen: {}".format(
                    sha256, first_seen))
                continue
        rule_name = hunt.get('context_attributes', {}).get('rule_name')
        ruleset_name = hunt.get('context_attributes', {}).get('ruleset_name')
        positives = hunt.get('attributes', {}).get('last_analysis_stats',
                                                   {}).get('malicious')
        meaningful_name = hunt.get('attributes', {}).get('meaningful_name')
        names = hunt.get('attributes', {}).get('names')
        tags = hunt.get('attributes', {}).get('tags')
        file_type = hunt.get('attributes', {}).get('type_description')
        match_in_subfile = hunt.get('context_attributes',
                                    {}).get('match_in_subfile')
        times_submitted = hunt.get('attributes', {}).get('times_submitted')
        unique_sources = hunt.get('attributes', {}).get('unique_sources')
        malware_name_info = vt3.getClassification(
            {"data": {
                "attributes": hunt_attributes
            }})
        malware_name = "{}.{}".format(malware_name_info.get('category'),
                                      malware_name_info.get('family'))

        info = {
            "sha256": sha256,
            "md5": md5,
            "rule_name": rule_name,
            "ruleset_name": ruleset_name,
            "first_seen": str(first_seen),
            "positives": positives,
            "file_type": file_type,
            "meaningful_name": meaningful_name,
            "names": names,
            "tags": tags,
            "times_submitted": times_submitted,
            "unique_sources": unique_sources,
            "match_in_subfile": match_in_subfile,
            "malware_name": malware_name
        }

        if print_only:
            color_scheme = None
            if positives < 10:
                color_scheme = 'yellow'
            if positives < 5:
                color_scheme = 'red'
            details = "MALWARE_NAME: {}\nNAME: {}\nFILE_TYPE: {}\nTAGS: {}\nNAMES: {}".format(
                malware_name, meaningful_name, file_type, ','.join(tags),
                ','.join(names))
            tstream.write_row(
                (md5, positives, rule_name, details, first_seen),
                colors=(color_scheme, color_scheme, None, color_scheme, None))
            tstream.write_sep()
        else:
            hunt_infos["info"].append(info)

    if print_only:
        print("Count of files: {}".format(len(hunts)))
        return
    else:
        return hunt_infos
Esempio n. 8
0
    # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
    # t.align = 'l'
    # t.max_width['id'] = 4
    # t.max_width['Status'] = 6
    # t.max_width['Type'] = 10
    # t.max_width['Name'] = 10
    # t.max_width['Left'] = 5
    # t.max_width['Right'] = 5
    # t.max_width['Child'] = 5
    # t.max_width['1st Sect'] = 8
    # t.max_width['Size'] = 6

    table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
                                    header_row=('id', 'Status', 'Type', 'Name',
                                                'Left', 'Right', 'Child',
                                                '1st Sect', 'Size'),
                                    style=tablestream.TableStyleSlim)

    # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
    # TODO: OR fix olefile!
    # TODO: olefile should store or give access to the raw direntry data on demand
    # TODO: oledir option to hexdump the raw direntries
    # TODO: olefile should be less picky about incorrect directory structures

    for id in xrange(len(ole.direntries)):
        d = ole.direntries[id]
        if d is None:
            # this direntry is not part of the tree: either unused or an orphan
            d = ole._load_direntry(id)  #ole.direntries[id]
            # print('%03d: %s *** ORPHAN ***' % (id, d.name))