コード例 #1
0
ファイル: calculate_statistics.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  
  parser = argparse.ArgumentParser(description='perform diff actions between functions in the database')
  #parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [],
  #    help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.')
  parser.add_argument('-x', '--force', dest='force', help='clear all statistics', action="store_true")
  parser.add_argument('-X', '--recreate-tables', help='recreate all feature tables and exit',
      action='store_true', dest='recreate_tables')
  parser.add_argument('-m', '--feature', dest='feature', choices=all_features.keys(), nargs='*')
  args = parser.parse_args()

  fundb = DB()

  match_features = []
  for mtr in all_features:
    if args.feature is not None and len(args.feature) and not mtr in args.feature:
      continue
    match_features.append(mtr)

  if args.recreate_tables:
    for m in match_features:
      logging.info("recreating table for feature %s" % m)
      fundb.recreate_features_table(all_features[m])
    fundb.save()
    return
  
  if args.force:
    for m in match_features:
      logging.info("clearing data for feature %s" % m)
      fundb.delete_feature_data(all_features[m])
  
  logging.info("looking for missing features")
  function_texts = fundb.get_function_texts(with_missing_features=[all_features[m] for m in match_features])

  if len(function_texts) == 0:
    logging.warning("seems like everything is already up-to-date.")
    return
  logging.info("done, starting calculations")

  
  prog = NProgressPrinter(len(function_texts))
  for row in function_texts:
    prog.bump()
    text_id, signature, text = row
    
    c = Codeblock()
    c.disassembly_from_text(text)
    
    logging.debug("updating features for %d/%s..." % (text_id, signature))
    for m in all_features:
      mcounter = all_features[m]
      mcounter.calculate(c)
      fundb.store_features(text_id, mcounter)
    fundb.save()
    
  fundb.save()
コード例 #2
0
ファイル: dbutil.py プロジェクト: xou/mcmatch
def main():
    available_features = counter_features.keys()
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='perform delete actions between functions in the database')
    parser.add_argument('-o',
                        '--object',
                        dest='objects',
                        action='append',
                        default=[],
                        help='delete objects by full path',
                        required=True)
    args = parser.parse_args()

    fundb = DB()
    for obj in args.objects:
        logging.info("deleting %s" % obj)
        fundb.delete_objects_by_filename(obj)
    fundb.save()
コード例 #3
0
ファイル: extraction.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  fdb = DB()
  new_functions = []
  if len(sys.argv) > 1:
    for arg in sys.argv[1:]:
      if os.path.isdir(arg):
        new_functions += process_dir(fdb, arg)[0]
      elif os.path.isfile(arg):
        file_functions = process_file(fdb, arg, False, True)
        if file_functions is not None:
          new_functions += file_functions
      else:
        logging.error("i don't know what to do with argument %s")
  else:
    new_functions = process_dir(fdb, ".", 400)

  logging.info("scanning finished. found %d new functions. saving" % len(new_functions))


  fdb.save()
コード例 #4
0
ファイル: set_compiler_options.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    parser = argparse.ArgumentParser(
        description='set compile options for the given objects')
    #parser.add_argument('-s', '--opt-string', type=str, dest='optstring', action='store', required=True,
    #    help='compiler command line parameters (for example, gcc -O2 -static)')
    parser.add_argument('-O',
                        '--opt-level',
                        type=str,
                        help='optimization level',
                        dest='optlevel',
                        default=None)
    parser.add_argument('-c',
                        '--compiler',
                        type=str,
                        help='compiler name',
                        dest='compiler',
                        default=None)
    parser.add_argument('-v',
                        '--compiler-version',
                        type=str,
                        help='compiler version',
                        dest='compiler_version',
                        default=None)
    parser.add_argument('-r',
                        '--repository',
                        type=str,
                        help='set repository (ex.: git-1.1)',
                        default=None)
    parser.add_argument('objfiles',
                        metavar="f.o",
                        type=str,
                        nargs='+',
                        help='object files to update')
    args = parser.parse_args()

    #compopts = CompilerOptions.from_string(args.optstring)
    compopts = CompilerOptions()
    compopts.compiler = args.compiler
    compopts.compiler_version = args.compiler_version
    compopts.opt = args.optlevel
    compopts.repository = args.repository
    logging.info("setting %s on %d objects" %
                 (compopts.get_shortinfo(), len(args.objfiles)))

    db = DB()

    counter = 0
    for obj in args.objfiles:
        obj = os.path.abspath(obj)
        mtime = os.stat(obj).st_mtime
        result = db.set_compiler_options_by_path(obj, mtime, compopts)
        if result:
            counter += 1
        else:
            logging.warning("no update performed on object %s." % obj)

    db.save()

    logging.info("Updated compiler info for %d file(s)" % counter)
コード例 #5
0
ファイル: libc_bench.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(level=logging.DEBUG)
    parser = argparse.ArgumentParser(
        description='perform diff actions between functions in the database')
    parser.add_argument(
        '-a',
        '--repository-a',
        dest='training_sets',
        default=[],
        action="append",
        help=
        'compare given function to all others (filters apply). Can be specified multiple times.'
    )
    parser.add_argument(
        '-b',
        '--repository-b',
        dest='test_set',
        default=None,
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument('-l',
                        '--list',
                        dest='list',
                        action='store_true',
                        help='list repositories',
                        default=None)

    parser.add_argument('-f',
                        '--list-functions',
                        dest='list_functions_in',
                        help='list functions in given repository',
                        default=None)
    FeatureArg.apply(parser)
    args = parser.parse_args()

    fdb = DB()
    if args.list:
        i = 0
        for repo in fdb.get_repository_names():
            print repo
            i += 1
        print "%d repositories." % i
        return

    if args.list_functions_in:
        i = 0
        for fun in fdb.get_functions_by_repository(args.list_functions_in):
            print fun.get_shortinfo(db=fdb)
            i += 1
        print "%d functions in %s." % (i, args.list_functions_in)
        return

    if not len(args.training_sets) or args.test_set is None:
        logging.error("ERROR: Either -l, -f or both -a and -b are required.")
        return

    functions_a = list(fdb.get_function_texts_by_repository(
        args.training_sets))
    functions_b = list(fdb.get_function_texts_by_repository(args.test_set))

    logging.info("repository %s: %d functions" %
                 (args.training_sets, len(functions_a)))
    logging.info("repository %s: %d functions" %
                 (args.test_set, len(functions_b)))

    if not len(functions_a):
        logging.error("repository %s has no functions" % (args.training_sets))
        return

    if not len(functions_b):
        logging.error("repository %s has no functions" % (args.test_set))
        return

    aggr = FeatureArg.get_aggregator(args)
    scale_features = FeatureArg.scale_features(args)

    do_knn(fdb, aggr, scale_features, args.training_sets, args.test_set)
    do_dist(fdb, aggr, scale_features, args.training_sets, args.test_set)
コード例 #6
0
ファイル: dump_functions.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='dump objectfile/function structure from the database')
    parser.add_argument(
        '-o',
        '--objects',
        dest='object_filter',
        action='append',
        default=[],
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument(
        '-f',
        '--functions',
        dest='function_filter',
        action='append',
        default=[],
        help=
        'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)'
    )
    parser.add_argument(
        '-b',
        '--both',
        help=
        """only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
        action='store_true',
        dest='require_both')
    parser.add_argument(
        '-m',
        '--min-length',
        help='ignore functions with less instructions than this',
        default=5,
        type=int,
        action='store',
        dest='min_length')
    args = parser.parse_args()

    if len(args.function_filter) == 0 or len(args.object_filter) == 0:
        args.require_both = False

    fundb = DB()
    x_all_fns = list(fundb.all_functions())
    all_fns = []
    allfn_namefilter_active = len(args.function_filter) > 0 or len(
        args.object_filter) > 0

    if len(args.object_filter):
        fundb.precache_containing_objects(None)

    if allfn_namefilter_active:
        for fn in x_all_fns:
            if len(fn.disassembly) < args.min_length:
                continue
            fname_matches = True in [
                filt in fn.name for filt in args.function_filter
            ]
            objnm_matches = True in [
                filt in fn.in_object for filt in args.object_filter
            ]

            if args.require_both:
                if fname_matches and objnm_matches:
                    all_fns.append(fn)
            else:
                if fname_matches or objnm_matches:
                    all_fns.append(fn)
    else:
        all_fns = x_all_fns
    del x_all_fns

    if len(all_fns) == 0:
        logging.error("no functions to print")
        return

    # put functions back into an object dict
    objdict = {}
    for fun in all_fns:
        if not fun.object_id in objdict:
            objdict[fun.object_id] = []
        objdict[fun.object_id].append(fun)

    for objectid in objdict:
        obj = fundb.get_object(objectid)
        print obj.get_path()
        for fun in objdict[objectid]:
            print ">>", fun.get_shortinfo(obj.get_compileopts())
コード例 #7
0
def main():
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='perform diff actions between functions in the database')
    parser.add_argument(
        '-a',
        '--function-a',
        dest='function_a',
        action='append',
        default=[],
        help=
        'compare given function to all others (filters apply). Can be specified multiple times.'
    )
    parser.add_argument(
        '-o',
        '--objects',
        dest='object_filter',
        action='append',
        default=[],
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument(
        '-f',
        '--functions',
        dest='function_filter',
        action='append',
        default=[],
        help=
        'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)'
    )
    parser.add_argument(
        '-b',
        '--both',
        help=
        """only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
        action='store_true',
        dest='require_both')
    parser.add_argument(
        '-m',
        '--min-length',
        help='ignore functions with less instructions than this',
        default=5,
        type=int,
        action='store',
        dest='min_length')
    parser.add_argument(
        '--mode',
        help='diff mode to use',
        choices=['diff-ratio', 'feature-default', 'feature-mncount'],
        action='store',
        dest='mode')
    parser.add_argument('-s',
                        '--scale',
                        help='use scaling for feature-* modes',
                        action='store_true',
                        dest='scale')
    args = parser.parse_args()

    if len(args.function_filter) == 0 or len(args.object_filter) == 0:
        args.require_both = False

    fundb = DB()
    logging.info("Loading functions")
    x_all_fns = fundb.all_functions()
    all_fns = []
    allfn_namefilter_active = len(args.function_filter) > 0 or len(
        args.object_filter) > 0

    if len(args.object_filter):
        fundb.precache_containing_objects(None)

    mode = MODE_FNDIFF
    if args.mode == 'feature-default':
        mode = MODE_METRIC_EUCLID
    elif args.mode == 'feature-mncount':
        mode = MODE_HIST_EUCLID
    elif args.mode is not None:
        raise Exception("something went wrong, got %s as --mode" % args.mode)

    scaling = None
    if args.scale:
        logging.info("collecting feature scaling information...")
        scaling = make_scaling(x_all_fns)

    if allfn_namefilter_active:
        for fn in x_all_fns:
            if fn.disassembly and (len(fn.disassembly) < args.min_length):
                continue
            fname_matches = True in [
                filt in fn.name for filt in args.function_filter
            ]
            objnm_matches = True in [
                filt
                in fundb.get_object(fn.get_container_object_id()).get_path()
                for filt in args.object_filter
            ]

            if args.require_both:
                if fname_matches and objnm_matches:
                    all_fns.append(fn)
            else:
                if fname_matches or objnm_matches:
                    all_fns.append(fn)
    else:
        all_fns = list(x_all_fns)
    del x_all_fns

    logging.info("Loaded functions, initializing analysis")

    if len(all_fns) == 0:
        logging.warning("no functions to analyze")
        return

    l = []
    if len(args.function_a):
        fun_dict = fundb.get_functions_by_shortname(args.function_a)
        # verify that there are actually functions to analyze
        has_fna = False
        for funname in fun_dict:
            if fun_dict[funname] is not None:
                has_fna = True
                break
        if not has_fna:
            logging.error("could not find any 'function a'")
            return
        l = m_to_n_compare(fundb, fun_dict, all_fns, mode, scaling)
    else:
        l = n_to_n_compare(fundb, all_fns, mode, scaling)

    print "done."
    l.sort(reverse=(mode == MODE_FNDIFF))

    for i in range(len(l)):
        print("%.3f" % l[i][0]), l[i][1:]