コード例 #1
0
ファイル: dump_functions.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  
  parser = argparse.ArgumentParser(description='dump objectfile/function structure from the database')
  parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [],
      help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.')
  parser.add_argument('-f', '--functions', dest='function_filter',
      action='append', default = [],
      help='only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)')
  parser.add_argument('-b', '--both', help="""only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
      action='store_true', dest='require_both')
  parser.add_argument('-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int,
      action='store', dest='min_length')
  args = parser.parse_args()

  if len(args.function_filter) == 0 or len(args.object_filter) == 0:
    args.require_both = False

  fundb = DB()
  x_all_fns = list(fundb.all_functions())
  all_fns = []
  allfn_namefilter_active = len(args.function_filter) > 0 or len(args.object_filter) > 0

  if len(args.object_filter):
    fundb.precache_containing_objects(None)
  
  if allfn_namefilter_active:
    for fn in x_all_fns:
      if len(fn.disassembly) < args.min_length:
        continue
      fname_matches = True in [filt in fn.name for filt in args.function_filter]
      objnm_matches = True in [filt in fn.in_object for filt in args.object_filter]

      if args.require_both:
        if fname_matches and objnm_matches:
          all_fns.append(fn)
      else:
        if fname_matches or objnm_matches:
          all_fns.append(fn)
  else:
    all_fns = x_all_fns
  del x_all_fns

  if len(all_fns) == 0:
    logging.error("no functions to print")
    return

  # put functions back into an object dict
  objdict = {}
  for fun in all_fns:
    if not fun.object_id in objdict:
      objdict[fun.object_id] = []
    objdict[fun.object_id].append(fun)

  for objectid in objdict:
    obj = fundb.get_object(objectid)
    print obj.get_path()
    for fun in objdict[objectid]:
      print ">>", fun.get_shortinfo(obj.get_compileopts())
コード例 #2
0
ファイル: main.py プロジェクト: xou/mcmatch
 def repo(self, reponame):
   # TODO add support for reponame=None
   fdb = PgFunDB()
   object_ids  = fdb.get_objectids_matching(repository_is=reponame)
   objects = fdb.get_objects(object_ids)
   template = env.get_template("repo.tpl.html")
   return template.render(title="Repository %s" % (reponame), reponame=reponame, objects=objects)
コード例 #3
0
ファイル: libc_bench.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.DEBUG)
  parser = argparse.ArgumentParser(description='perform diff actions between functions in the database')
  parser.add_argument('-a', '--repository-a', dest='training_sets', default=[], action="append",
      help='compare given function to all others (filters apply). Can be specified multiple times.')
  parser.add_argument('-b', '--repository-b', dest='test_set', default=None,
      help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.')
  parser.add_argument('-l', '--list', dest='list',
      action='store_true',
      help='list repositories',
      default=None)

  parser.add_argument('-f', '--list-functions', dest='list_functions_in',
      help='list functions in given repository',
      default=None)
  FeatureArg.apply(parser)
  args = parser.parse_args()


  fdb = DB()
  if args.list:
    i = 0
    for repo in fdb.get_repository_names():
      print repo
      i += 1
    print "%d repositories." % i
    return

  if args.list_functions_in:
    i = 0
    for fun in fdb.get_functions_by_repository(args.list_functions_in):
      print fun.get_shortinfo(db=fdb)
      i += 1
    print "%d functions in %s." % (i, args.list_functions_in)
    return

  if not len(args.training_sets)  or args.test_set is None:
    logging.error("ERROR: Either -l, -f or both -a and -b are required.")
    return

  functions_a = list(fdb.get_function_texts_by_repository(args.training_sets))
  functions_b = list(fdb.get_function_texts_by_repository(args.test_set))

  logging.info("repository %s: %d functions" % (args.training_sets, len(functions_a)))
  logging.info("repository %s: %d functions" % (args.test_set, len(functions_b)))

  if not len(functions_a):
    logging.error("repository %s has no functions" % (args.training_sets))
    return

  if not len(functions_b):
    logging.error("repository %s has no functions" % (args.test_set))
    return

  aggr = FeatureArg.get_aggregator(args)
  scale_features = FeatureArg.scale_features(args)

  do_knn(fdb, aggr, scale_features, args.training_sets, args.test_set)
  do_dist(fdb, aggr, scale_features, args.training_sets, args.test_set)
コード例 #4
0
ファイル: fundb_test.py プロジェクト: xou/mcmatch
    def setUp(self):
        sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")

        self.postgresql = testing.postgresql.Postgresql()
        self.conn = psycopg2.connect(**self.postgresql.dsn())
        with self.conn.cursor() as cursor:
            cursor.execute(open(sql_init_bare_fname, "r").read())
        self.fdb = PgFunDB(conn=self.conn)
コード例 #5
0
ファイル: main.py プロジェクト: xou/mcmatch
 def repos(self, search=None, submit=None):
   if search is None:
     search = ""
   fdb = PgFunDB()
   repos_names = fdb.get_repository_names()
   repos_names = filter(lambda z: search in z, repos_names)
   template = env.get_template("repos.tpl.html")
   return template.render(title="repositories", repos=repos_names)
コード例 #6
0
ファイル: main.py プロジェクト: xou/mcmatch
  def fn_search(self, search=None, submit=None):
    functions = []
    fdb = PgFunDB()
    if search is not None:
      functions = list(fdb.get_functions_matching_signature(search, limit=300))

    template = env.get_template("find-fn.html")
    return template.render(title="Function search",
                           functions=functions,
                           search=search if search is not None else "")
コード例 #7
0
ファイル: main.py プロジェクト: xou/mcmatch
 def obj(self, id):
   fdb = PgFunDB()
   id = int(id)
   obj_ = fdb.get_object(id)
   if obj_ is None:
     raise cherrypy.HTTPError(404, "Object with ID %d not found" % id)
   
   assert isinstance(obj_, ObjectInfo)
   template = env.get_template('obj.tpl.html')
   title = 'showing object #%d' % (id)
   functions = list(fdb.get_functions_by_objectid(id))
   return template.render(title=title, obj=obj_,
       functions=functions)
コード例 #8
0
ファイル: main.py プロジェクト: xou/mcmatch
  def fn(self, id):
    fdb = PgFunDB()
    id = int(id)
    fun = fdb.get_function_by_id(id, include_disassembly=True)
    if not fun:
      raise cherrypy.HTTPError(404, "Function with ID %s not found" % id)

    c_mt = self._make_features(self._optional_code())
    f_mt = self._make_features(fun)

    template = env.get_template('fn.tpl.html')
    title = "showing function %d" % (fun.function_id)
    return  template.render(title=title, fun=fun, fmetr=f_mt, cmetr=c_mt, metr_keys=sorted(f_mt.keys()))
コード例 #9
0
ファイル: dbutil.py プロジェクト: xou/mcmatch
def main():
  available_features = counter_features.keys()
  logging.basicConfig(level=logging.INFO)

  parser = argparse.ArgumentParser(description='perform delete actions between functions in the database')
  parser.add_argument('-o', '--object', dest='objects', action='append', default = [],
      help='delete objects by full path', required=True)
  args = parser.parse_args()

  fundb = DB()
  for obj in args.objects:
    logging.info("deleting %s" % obj)
    fundb.delete_objects_by_filename(obj)
  fundb.save()
コード例 #10
0
ファイル: fundb_test.py プロジェクト: xou/mcmatch
 def setUp(self):
   sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")
   
   self.postgresql = testing.postgresql.Postgresql()
   self.conn = psycopg2.connect(**self.postgresql.dsn())
   with self.conn.cursor() as cursor:
     cursor.execute(open(sql_init_bare_fname, "r").read())
   self.fdb = PgFunDB(conn=self.conn)
コード例 #11
0
  def setUp(self):
    sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")

    logging.info("setting up database")
    self.postgresql = testing.postgresql.Postgresql()
    self.conn = psycopg2.connect(**self.postgresql.dsn())
    with self.conn.cursor() as cursor:
      cursor.execute(open(sql_init_bare_fname, "r").read())
    self.fdb = PgFunDB(conn=self.conn)

    logging.info("extracting functions")
    glibc = os.path.join(MCMATCH_BASE, "test/libc_data/libc-2.20.so")
    process_file(self.fdb, glibc, False, True)
    obj = self.fdb.get_object(self.fdb.get_objectids_matching(filename_is="libc-2.20.so")[0])
    obj.get_compileopts().set_repository("glibc-2.20")
    self.fdb.set_compiler_options(obj)

    dietlibc = os.path.join(MCMATCH_BASE, "test/libc_data/dietlibc/libc.so")
    process_file(self.fdb, dietlibc)
    objids = self.fdb.get_objectids_matching(path_contains="test/libc_data/dietlibc/")
    for objid in objids:
      obj = self.fdb.get_object(objid)
      obj.get_compileopts().set_repository("dietlibc-0.33")
      self.fdb.set_compiler_options(obj)

    logging.info("creating features")

    feature_instances = [counter_features[m] for m in counter_features]
    for m in feature_instances:
      self.fdb.recreate_features_table(m)

    function_texts = self.fdb.get_function_texts(with_missing_features=feature_instances)

    for row in function_texts:
      text_id, signature, text = row

      c = Codeblock()
      c.disassembly_from_text(text)

      for m in counter_features:
        mcounter = counter_features[m]
        mcounter.calculate(c)
        self.fdb.store_features(text_id, mcounter)
    self.fdb.save()
コード例 #12
0
ファイル: dbutil.py プロジェクト: xou/mcmatch
def main():
    available_features = counter_features.keys()
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='perform delete actions between functions in the database')
    parser.add_argument('-o',
                        '--object',
                        dest='objects',
                        action='append',
                        default=[],
                        help='delete objects by full path',
                        required=True)
    args = parser.parse_args()

    fundb = DB()
    for obj in args.objects:
        logging.info("deleting %s" % obj)
        fundb.delete_objects_by_filename(obj)
    fundb.save()
コード例 #13
0
ファイル: extraction.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  fdb = DB()
  new_functions = []
  if len(sys.argv) > 1:
    for arg in sys.argv[1:]:
      if os.path.isdir(arg):
        new_functions += process_dir(fdb, arg)[0]
      elif os.path.isfile(arg):
        file_functions = process_file(fdb, arg, False, True)
        if file_functions is not None:
          new_functions += file_functions
      else:
        logging.error("i don't know what to do with argument %s")
  else:
    new_functions = process_dir(fdb, ".", 400)

  logging.info("scanning finished. found %d new functions. saving" % len(new_functions))


  fdb.save()
コード例 #14
0
ファイル: launch_meld.py プロジェクト: xou/mcmatch
def main():
  if len(sys.argv) < 2 or len(sys.argv) > 3:
    print "Usage:", sys.argv[0], " <function1> [function2]"
    return

  fdb = DB()

  fns = sys.argv[1:]
  funs = fdb.get_functions_by_shortname(fns)
  fnames = []
  for fun in funs:
    if funs[fun] is None:
      logging.error("One or more functions couldn't be found, for example %s." % fun)
      return
    fnames.append(dump_disassembly_to_temp(funs[fun], Fn.DIFF_MNEMONIC | Fn.DIFF_PARAMETERS ))

  if len(fnames) == 1:
    subprocess.call(['gedit', fnames[0]])
  elif len(fnames) == 2:
    print_stats(funs.values()[0], funs.values()[1])
    subprocess.call(['meld', fnames[0], fnames[1]])
  else:
    print "len(fnames) == %d (this is an error)" % len(fnames)
コード例 #15
0
def main():
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(description='find k-nearest-neighbours')
    parser.add_argument('-F',
                        '--file',
                        dest='file',
                        action='append',
                        default=[],
                        help='use the given assembly file[s]')
    FeatureArg.apply(parser)
    args = parser.parse_args()

    if len(args.file) == 0:
        print "Error: need at least one file."
        return

    fdb = PgFunDB()
    metr = FeatureArg.get_aggregator(args)
    scale_features = FeatureArg.scale_features(args)
    knn = KNearestNeighbors(fdb, metr, 30, scale_features=scale_features)
    # TODO Cluster class should accept more than one Codeblock
    for f in args.file:
        knn_file(f, fdb, knn)
コード例 #16
0
#!/usr/bin/python2
import sys
import os
from mcmatch.db.pg_database import PgFunDB

fdb = PgFunDB()
cmds = {}


def repo(args):
    if not len(args) or args[0] == 'ls':
        rx = fdb.get_repository_names()
        for r in rx:
            print r


cmds['repo'] = repo


def _objfile_objids(args):
    for arg in args:
        arg = os.path.abspath(arg)
        objids = fdb.get_objectids_matching(path_is=arg)
        if not len(objids):
            print "Error: No object matching path %s" % (arg)
            continue
        yield objids


def objfile_show(args):
    for objids in _objfile_objids(args):
コード例 #17
0
ファイル: dump_functions.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='dump objectfile/function structure from the database')
    parser.add_argument(
        '-o',
        '--objects',
        dest='object_filter',
        action='append',
        default=[],
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument(
        '-f',
        '--functions',
        dest='function_filter',
        action='append',
        default=[],
        help=
        'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)'
    )
    parser.add_argument(
        '-b',
        '--both',
        help=
        """only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
        action='store_true',
        dest='require_both')
    parser.add_argument(
        '-m',
        '--min-length',
        help='ignore functions with less instructions than this',
        default=5,
        type=int,
        action='store',
        dest='min_length')
    args = parser.parse_args()

    if len(args.function_filter) == 0 or len(args.object_filter) == 0:
        args.require_both = False

    fundb = DB()
    x_all_fns = list(fundb.all_functions())
    all_fns = []
    allfn_namefilter_active = len(args.function_filter) > 0 or len(
        args.object_filter) > 0

    if len(args.object_filter):
        fundb.precache_containing_objects(None)

    if allfn_namefilter_active:
        for fn in x_all_fns:
            if len(fn.disassembly) < args.min_length:
                continue
            fname_matches = True in [
                filt in fn.name for filt in args.function_filter
            ]
            objnm_matches = True in [
                filt in fn.in_object for filt in args.object_filter
            ]

            if args.require_both:
                if fname_matches and objnm_matches:
                    all_fns.append(fn)
            else:
                if fname_matches or objnm_matches:
                    all_fns.append(fn)
    else:
        all_fns = x_all_fns
    del x_all_fns

    if len(all_fns) == 0:
        logging.error("no functions to print")
        return

    # put functions back into an object dict
    objdict = {}
    for fun in all_fns:
        if not fun.object_id in objdict:
            objdict[fun.object_id] = []
        objdict[fun.object_id].append(fun)

    for objectid in objdict:
        obj = fundb.get_object(objectid)
        print obj.get_path()
        for fun in objdict[objectid]:
            print ">>", fun.get_shortinfo(obj.get_compileopts())
コード例 #18
0
ファイル: main.py プロジェクト: xou/mcmatch
  def knn(self, submit=False, graph=False, **flags):
    code = self._require_code()
    
    # build options for features
    features = Group("features")
    for feature_group in grouped_features:
      for available_feature in sorted(grouped_features[feature_group].keys()):
        features.add(CheckBox(available_feature), feature_group)

    preprocessing = Group("preprocessing")
    preprocessing.add(CheckBox('ftrscale', 'feature scaling'))
    preprocessing.add(CheckBox('pca', 'PCA'))
    preprocessing.add(CheckBox('randompca', 'Random PCA'))
    preprocessing.add(CheckBox('kernelpca', 'Kernel PCA'))
    #preprocessing.add(CheckBox('lmnn', 'LMNN'))
    #preprocessing.add(CheckBox('nca', 'NCA'))
    
    form = Form()
    form.addGroup(features)
    form.addGroup(preprocessing)
    
    
    template = env.get_template('knn.tpl.html')
    if submit == False and graph == False:
      return template.render(title='K-NearestNeighbors', form_inner=form.getHTML(), result='')
    
    form.updateState(flags)
    
    selected_features = filter(lambda feature: form.g('features').k(feature).value, all_features)
    opt_feature_scaling = form.g('preprocessing').k('ftrscale').value * TransformPipeline.TRANSFORM_SCALE
    opt_pca = form.g('preprocessing').k('pca').value * TransformPipeline.TRANSFORM_PCA
    opt_random_pca = form.g('preprocessing').k('randompca').value * TransformPipeline.TRANSFORM_RANDOM_PCA
    opt_kernel_pca = form.g('preprocessing').k('kernelpca').value * TransformPipeline.TRANSFORM_KERNEL_PCA
    #opt_lmnn = form.g('preprocessing').k('lmnn').value * TransformPipeline.TRANSFORM_LMNN
    #opt_nca = form.g('preprocessing').k('nca').value * TransformPipeline.TRANSFORM_NCA
    
    transform = opt_feature_scaling + opt_pca + opt_random_pca + opt_kernel_pca # + opt_lmnn + opt_nca
    
    
    c = Codeblock()
    c.disassembly_from_text(code)
    print c.get_mnemonic_histogram()
    metr = FeatureAggregator([all_features[m] for m in selected_features])
    fdb = PgFunDB()
    repos = list(fdb.get_repository_names())
    # TODO add select mode
    #repos = [r if r is not "None" else None for r in repos]
    repos = filter(lambda n: n != "musl-1.1.6" and n != 't-glibc', repos)
    knn = KNearestNeighbors(fdb, metr, 100, opt_feature_scaling, training_repositories=repos)
    distances, ft_info = knn.get_neighbours(c)

    function_text_ids = [f[0] for f in ft_info]
    # cache compileroptions for all loaded functions
    fdb.precache_containing_objects(fn_textids=function_text_ids)

    result = []
    valueRange = (1e20, 0)
    for i in range(0, len(distances[0])):
      valueRange = (min(valueRange[0], distances[0][i]), max(valueRange[1], distances[0][i]))
      result.append((self._make_ft_checkbox(function_text_ids[i], flags), ("%6f" % distances[0][i]), FunctionTextTree(fdb, function_text_ids[i])))
    
    plotsrc = None
    if graph:
      nn = DistanceInfo(fdb, metr, opt_feature_scaling, None, "euclidean")
      dists, tb_info = nn.test_codeblock(c)
      function_text_ids_ = [f[0] for f in nn.get_trainingset_infos()]
      additional_ftdids = []
      if 'add_ftdids' in flags:
        additional_ftdids = [int(z) for z in flags['add_ftdids'].split(",")]
      equivalences = self._get_index_from_ftid(self._get_selected_ftids(flags) + additional_ftdids,
                                               function_text_ids_)
      function_names = [nn.get_trainingset_infos()[idx][1] for idx in equivalences]
      tb_info = (-1, "md5_process_block")
      nn.make_graph_single(dists[0], tb_info, equivalences,
                           valueRange=(valueRange[0]*0.9, valueRange[1]*1.8),
                           equiNames=function_names)
      plotsrc = self._pyplot_to_inline_image()
        
    return template.render(title='K-NearestNeighbors',
                           form_inner=form.getHTML(),
                           result=result,
                           plotsrc=plotsrc)
コード例 #19
0
ファイル: fundb_test.py プロジェクト: xou/mcmatch
class Test(unittest.TestCase):
    def setUp(self):
        sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")

        self.postgresql = testing.postgresql.Postgresql()
        self.conn = psycopg2.connect(**self.postgresql.dsn())
        with self.conn.cursor() as cursor:
            cursor.execute(open(sql_init_bare_fname, "r").read())
        self.fdb = PgFunDB(conn=self.conn)

    def tearDown(self):
        self.conn.close()
        self.postgresql.stop()

    def testEmptyDB(self):
        self.assertEqual(0, len(list(self.fdb.all_functions(False))))
        self.assertEqual(0, len(list(self.fdb.all_functions(True))))
        self.assertEqual(None, self.fdb.get_function_by_id(1, True))
        self.assertEqual(0, self.fdb.get_function_count())
        self.assertEqual(0, len(list(self.fdb.get_repository_names())))
        self.assertEqual([], self.fdb.get_objects([1]))

    def testOneObject(self):
        fn = Fn(
            "test",
            "void test(int x);",
            "test.c",
        )
        obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None)
        obj.set_compileopts(CompilerOptions.from_string("gcc -O2"))
        self.fdb.store_object(obj)
        self.fdb.save()

        # Test valid matchers
        fids = self.fdb.get_objectids_matching(filename_is="test.o")
        self.assertEqual(fids, [1])

        fids = self.fdb.get_objectids_matching(filename_contains="test")
        self.assertEqual(fids, [1])

        fids = self.fdb.get_objectids_matching(path_contains="mp/")
        self.assertEqual(fids, [1])

        fids = self.fdb.get_objectids_matching(path_contains="mp",
                                               filename_contains="test",
                                               filename_is="test.o")
        self.assertEqual(fids, [1])

        # Test invalid combinations
        fids = self.fdb.get_objectids_matching(path_contains="xp")
        self.assertEqual(fids, [])

        fids = self.fdb.get_objectids_matching(path_contains="mp",
                                               filename_contains="test",
                                               filename_is="test")
        self.assertEqual(fids, [])

        obj = self.fdb.get_object(1)
        self.assertEquals(obj.get_path(), "/tmp/test.o")
        self.assertEquals(obj.get_compileopts().get_optlevel(), "2")
        self.assertEquals(obj.get_compileopts().get_compiler(), "gcc")

        self.assertEqual(1, len(list(self.fdb.all_functions(False))))
        fun = self.fdb.get_function_by_id(1, False)
        self.assertEqual(1, fun.get_container_object_id())

    def testTwoObjects(self):
        fn = Fn(
            "test",
            "void test(int x);",
            "test.c",
        )
        obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None)
        obj.set_compileopts(CompilerOptions.from_string("gcc -O2"))
        self.fdb.store_object(obj)
        self.fdb.save()

        fn2 = Fn("test2", "void test2(int x);", "test2.c")
        fn3 = Fn("test", "void test(int x);", "test.c")

        obj = ObjectInfo("/tmp/test2.o", time.time(), [fn2, fn3], False, None)
        co2 = CompilerOptions()
        co2.set_compiler("gcc")
        co2.set_optlevel("1")
        co2.set_repository("test2")
        obj.set_compileopts(co2)
        self.fdb.store_object(obj)
        # don't save this one - functions should be visible
        # even without fdb.save()
        self.assertEqual(3, self.fdb.get_function_count())
        self.assertEqual(
            1, self.fdb.get_functions_by_shortname(['test/O2'], False))
コード例 #20
0
ファイル: libc_bench.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(level=logging.DEBUG)
    parser = argparse.ArgumentParser(
        description='perform diff actions between functions in the database')
    parser.add_argument(
        '-a',
        '--repository-a',
        dest='training_sets',
        default=[],
        action="append",
        help=
        'compare given function to all others (filters apply). Can be specified multiple times.'
    )
    parser.add_argument(
        '-b',
        '--repository-b',
        dest='test_set',
        default=None,
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument('-l',
                        '--list',
                        dest='list',
                        action='store_true',
                        help='list repositories',
                        default=None)

    parser.add_argument('-f',
                        '--list-functions',
                        dest='list_functions_in',
                        help='list functions in given repository',
                        default=None)
    FeatureArg.apply(parser)
    args = parser.parse_args()

    fdb = DB()
    if args.list:
        i = 0
        for repo in fdb.get_repository_names():
            print repo
            i += 1
        print "%d repositories." % i
        return

    if args.list_functions_in:
        i = 0
        for fun in fdb.get_functions_by_repository(args.list_functions_in):
            print fun.get_shortinfo(db=fdb)
            i += 1
        print "%d functions in %s." % (i, args.list_functions_in)
        return

    if not len(args.training_sets) or args.test_set is None:
        logging.error("ERROR: Either -l, -f or both -a and -b are required.")
        return

    functions_a = list(fdb.get_function_texts_by_repository(
        args.training_sets))
    functions_b = list(fdb.get_function_texts_by_repository(args.test_set))

    logging.info("repository %s: %d functions" %
                 (args.training_sets, len(functions_a)))
    logging.info("repository %s: %d functions" %
                 (args.test_set, len(functions_b)))

    if not len(functions_a):
        logging.error("repository %s has no functions" % (args.training_sets))
        return

    if not len(functions_b):
        logging.error("repository %s has no functions" % (args.test_set))
        return

    aggr = FeatureArg.get_aggregator(args)
    scale_features = FeatureArg.scale_features(args)

    do_knn(fdb, aggr, scale_features, args.training_sets, args.test_set)
    do_dist(fdb, aggr, scale_features, args.training_sets, args.test_set)
コード例 #21
0
ファイル: fundb_test.py プロジェクト: xou/mcmatch
class Test(unittest.TestCase):
  def setUp(self):
    sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")
    
    self.postgresql = testing.postgresql.Postgresql()
    self.conn = psycopg2.connect(**self.postgresql.dsn())
    with self.conn.cursor() as cursor:
      cursor.execute(open(sql_init_bare_fname, "r").read())
    self.fdb = PgFunDB(conn=self.conn)

  def tearDown(self):
    self.conn.close()
    self.postgresql.stop()

  def testEmptyDB(self):
    self.assertEqual(0, len(list(self.fdb.all_functions(False))))
    self.assertEqual(0, len(list(self.fdb.all_functions(True))))
    self.assertEqual(None, self.fdb.get_function_by_id(1, True))
    self.assertEqual(0, self.fdb.get_function_count())
    self.assertEqual(0, len(list(self.fdb.get_repository_names())))
    self.assertEqual([], self.fdb.get_objects([1]))
  
  def testOneObject(self):
    fn = Fn("test", "void test(int x);", "test.c", )
    obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None)
    obj.set_compileopts(CompilerOptions.from_string("gcc -O2"))
    self.fdb.store_object(obj)
    self.fdb.save()
    
    # Test valid matchers
    fids = self.fdb.get_objectids_matching(filename_is="test.o")
    self.assertEqual(fids, [1])
    
    fids = self.fdb.get_objectids_matching(filename_contains="test")
    self.assertEqual(fids, [1])
    
    fids = self.fdb.get_objectids_matching(path_contains="mp/")
    self.assertEqual(fids, [1])

    fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test.o")
    self.assertEqual(fids, [1])
    
    # Test invalid combinations
    fids = self.fdb.get_objectids_matching(path_contains="xp")
    self.assertEqual(fids, [])
    
    fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test")
    self.assertEqual(fids, [])
    
    obj = self.fdb.get_object(1)
    self.assertEquals(obj.get_path(), "/tmp/test.o")
    self.assertEquals(obj.get_compileopts().get_optlevel(), "2")
    self.assertEquals(obj.get_compileopts().get_compiler(), "gcc")
  
    self.assertEqual(1, len(list(self.fdb.all_functions(False))))
    fun = self.fdb.get_function_by_id(1, False)
    self.assertEqual(1, fun.get_container_object_id())
        
  def testTwoObjects(self):
    fn = Fn("test", "void test(int x);", "test.c", )
    obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None)
    obj.set_compileopts(CompilerOptions.from_string("gcc -O2"))
    self.fdb.store_object(obj)
    self.fdb.save()
    
    fn2 = Fn("test2", "void test2(int x);", "test2.c")
    fn3 = Fn("test", "void test(int x);", "test.c")
    
    obj = ObjectInfo("/tmp/test2.o", time.time(), [fn2, fn3], False, None)
    co2 = CompilerOptions()
    co2.set_compiler("gcc")
    co2.set_optlevel("1")
    co2.set_repository("test2")
    obj.set_compileopts(co2)
    self.fdb.store_object(obj)
    # don't save this one - functions should be visible
    # even without fdb.save()
    self.assertEqual(3, self.fdb.get_function_count())
    self.assertEqual(1, self.fdb.get_functions_by_shortname(['test/O2'], False))
コード例 #22
0
ファイル: set_compiler_options.py プロジェクト: xou/mcmatch
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    parser = argparse.ArgumentParser(
        description='set compile options for the given objects')
    #parser.add_argument('-s', '--opt-string', type=str, dest='optstring', action='store', required=True,
    #    help='compiler command line parameters (for example, gcc -O2 -static)')
    parser.add_argument('-O',
                        '--opt-level',
                        type=str,
                        help='optimization level',
                        dest='optlevel',
                        default=None)
    parser.add_argument('-c',
                        '--compiler',
                        type=str,
                        help='compiler name',
                        dest='compiler',
                        default=None)
    parser.add_argument('-v',
                        '--compiler-version',
                        type=str,
                        help='compiler version',
                        dest='compiler_version',
                        default=None)
    parser.add_argument('-r',
                        '--repository',
                        type=str,
                        help='set repository (ex.: git-1.1)',
                        default=None)
    parser.add_argument('objfiles',
                        metavar="f.o",
                        type=str,
                        nargs='+',
                        help='object files to update')
    args = parser.parse_args()

    #compopts = CompilerOptions.from_string(args.optstring)
    compopts = CompilerOptions()
    compopts.compiler = args.compiler
    compopts.compiler_version = args.compiler_version
    compopts.opt = args.optlevel
    compopts.repository = args.repository
    logging.info("setting %s on %d objects" %
                 (compopts.get_shortinfo(), len(args.objfiles)))

    db = DB()

    counter = 0
    for obj in args.objfiles:
        obj = os.path.abspath(obj)
        mtime = os.stat(obj).st_mtime
        result = db.set_compiler_options_by_path(obj, mtime, compopts)
        if result:
            counter += 1
        else:
            logging.warning("no update performed on object %s." % obj)

    db.save()

    logging.info("Updated compiler info for %d file(s)" % counter)
コード例 #23
0
ファイル: diff_functions.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  
  parser = argparse.ArgumentParser(description='perform diff actions between functions in the database')
  parser.add_argument('-a', '--function-a', dest='function_a', action='append', default = [],
      help='compare given function to all others (filters apply). Can be specified multiple times.')
  parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [],
      help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.')
  parser.add_argument('-f', '--functions', dest='function_filter',
      action='append', default = [],
      help='only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)')
  parser.add_argument('-b', '--both', help="""only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
      action='store_true', dest='require_both')
  parser.add_argument('-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int,
      action='store', dest='min_length')
  parser.add_argument('--mode', help='diff mode to use', choices=['diff-ratio', 'feature-default', 'feature-mncount'],
      action='store', dest='mode')
  parser.add_argument('-s', '--scale', help='use scaling for feature-* modes', action='store_true', dest='scale')
  args = parser.parse_args()

  if len(args.function_filter) == 0 or len(args.object_filter) == 0:
    args.require_both = False

  fundb = DB()
  logging.info("Loading functions")
  x_all_fns = fundb.all_functions()
  all_fns = []
  allfn_namefilter_active = len(args.function_filter) > 0 or len(args.object_filter) > 0

  if len(args.object_filter):
    fundb.precache_containing_objects(None)
  
  mode = MODE_FNDIFF
  if args.mode == 'feature-default':
    mode = MODE_METRIC_EUCLID
  elif args.mode == 'feature-mncount':
    mode = MODE_HIST_EUCLID
  elif args.mode is not None:
    raise Exception("something went wrong, got %s as --mode" % args.mode)

  scaling = None
  if args.scale:
    logging.info("collecting feature scaling information...")
    scaling = make_scaling(x_all_fns) 

  if allfn_namefilter_active:
    for fn in x_all_fns:
      if fn.disassembly and (len(fn.disassembly) < args.min_length):
        continue
      fname_matches = True in [filt in fn.name for filt in args.function_filter]
      objnm_matches = True in [filt in fundb.get_object(fn.get_container_object_id()).get_path() for filt in args.object_filter]

      if args.require_both:
        if fname_matches and objnm_matches:
          all_fns.append(fn)
      else:
        if fname_matches or objnm_matches:
          all_fns.append(fn)
  else:
    all_fns = list(x_all_fns)
  del x_all_fns

  logging.info("Loaded functions, initializing analysis")

  if len(all_fns) == 0:
    logging.warning("no functions to analyze")
    return

  l = []
  if len(args.function_a):
    fun_dict = fundb.get_functions_by_shortname(args.function_a) 
    # verify that there are actually functions to analyze
    has_fna = False
    for funname in fun_dict:
      if fun_dict[funname] is not None:
        has_fna = True
        break
    if not has_fna:
      logging.error("could not find any 'function a'")
      return
    l = m_to_n_compare(fundb, fun_dict, all_fns, mode, scaling)
  else:
    l = n_to_n_compare(fundb, all_fns, mode, scaling)

  print "done."
  l.sort(reverse=(mode == MODE_FNDIFF))
  
  for i in range(len(l)):
    print ("%.3f" % l[i][0]), l[i][1:]
コード例 #24
0
class Test(unittest.TestCase):
  def setUp(self):
    sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql")

    logging.info("setting up database")
    self.postgresql = testing.postgresql.Postgresql()
    self.conn = psycopg2.connect(**self.postgresql.dsn())
    with self.conn.cursor() as cursor:
      cursor.execute(open(sql_init_bare_fname, "r").read())
    self.fdb = PgFunDB(conn=self.conn)

    logging.info("extracting functions")
    glibc = os.path.join(MCMATCH_BASE, "test/libc_data/libc-2.20.so")
    process_file(self.fdb, glibc, False, True)
    obj = self.fdb.get_object(self.fdb.get_objectids_matching(filename_is="libc-2.20.so")[0])
    obj.get_compileopts().set_repository("glibc-2.20")
    self.fdb.set_compiler_options(obj)

    dietlibc = os.path.join(MCMATCH_BASE, "test/libc_data/dietlibc/libc.so")
    process_file(self.fdb, dietlibc)
    objids = self.fdb.get_objectids_matching(path_contains="test/libc_data/dietlibc/")
    for objid in objids:
      obj = self.fdb.get_object(objid)
      obj.get_compileopts().set_repository("dietlibc-0.33")
      self.fdb.set_compiler_options(obj)

    logging.info("creating features")

    feature_instances = [counter_features[m] for m in counter_features]
    for m in feature_instances:
      self.fdb.recreate_features_table(m)

    function_texts = self.fdb.get_function_texts(with_missing_features=feature_instances)

    for row in function_texts:
      text_id, signature, text = row

      c = Codeblock()
      c.disassembly_from_text(text)

      for m in counter_features:
        mcounter = counter_features[m]
        mcounter.calculate(c)
        self.fdb.store_features(text_id, mcounter)
    self.fdb.save()

  def tearDown(self):
    self.conn.close()
    self.postgresql.stop()

  def testRepoDB(self):
    self.assertEqual(1, len(self.fdb.get_objectids_matching(repository_is="dietlibc-0.33")))
    self.assertEqual(1, len(self.fdb.get_objectids_matching(repository_is="glibc-2.20")))
    self.assertEqual(640, len(list(self.fdb.get_functions_by_repository("dietlibc-0.33"))))
    self.assertEqual(2468, len(list(self.fdb.get_functions_by_repository("glibc-2.20"))))

  def testAB(self):
    all_features = FeatureAggregator([counter_features[c] for c in counter_features])
    di = analyze.DistanceInfo(self.fdb, all_features, training_repositories=['glibc-2.20'], )
    pairwise_d, testset_infos = di.test(self.fdb, in_repositories=['dietlibc-0.33'])
    training_infos = di.get_trainingset_infos()
    em = analyze.DistanceInfo.make_equivalence_map(testset_infos, training_infos)
    good, bad, other = 0, 0, 0
    for i in range(0, len(em)):
        res = analyze.DistanceInfo.get_partition_sizes(pairwise_d[i], None, em[i])
        for el in res:
            if el[0] < el[2]:
                good += 1
            elif el[0] > el[2]:
                bad += 1
            else:
              other += 1
    self.assertEqual(101, good)
    self.assertEqual(69, bad)
    print other
コード例 #25
0
ファイル: calculate_statistics.py プロジェクト: xou/mcmatch
def main():
  logging.basicConfig(level=logging.INFO)
  
  parser = argparse.ArgumentParser(description='perform diff actions between functions in the database')
  #parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [],
  #    help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.')
  parser.add_argument('-x', '--force', dest='force', help='clear all statistics', action="store_true")
  parser.add_argument('-X', '--recreate-tables', help='recreate all feature tables and exit',
      action='store_true', dest='recreate_tables')
  parser.add_argument('-m', '--feature', dest='feature', choices=all_features.keys(), nargs='*')
  args = parser.parse_args()

  fundb = DB()

  match_features = []
  for mtr in all_features:
    if args.feature is not None and len(args.feature) and not mtr in args.feature:
      continue
    match_features.append(mtr)

  if args.recreate_tables:
    for m in match_features:
      logging.info("recreating table for feature %s" % m)
      fundb.recreate_features_table(all_features[m])
    fundb.save()
    return
  
  if args.force:
    for m in match_features:
      logging.info("clearing data for feature %s" % m)
      fundb.delete_feature_data(all_features[m])
  
  logging.info("looking for missing features")
  function_texts = fundb.get_function_texts(with_missing_features=[all_features[m] for m in match_features])

  if len(function_texts) == 0:
    logging.warning("seems like everything is already up-to-date.")
    return
  logging.info("done, starting calculations")

  
  prog = NProgressPrinter(len(function_texts))
  for row in function_texts:
    prog.bump()
    text_id, signature, text = row
    
    c = Codeblock()
    c.disassembly_from_text(text)
    
    logging.debug("updating features for %d/%s..." % (text_id, signature))
    for m in all_features:
      mcounter = all_features[m]
      mcounter.calculate(c)
      fundb.store_features(text_id, mcounter)
    fundb.save()
    
  fundb.save()
コード例 #26
0
def main():
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
        description='perform diff actions between functions in the database')
    parser.add_argument(
        '-a',
        '--function-a',
        dest='function_a',
        action='append',
        default=[],
        help=
        'compare given function to all others (filters apply). Can be specified multiple times.'
    )
    parser.add_argument(
        '-o',
        '--objects',
        dest='object_filter',
        action='append',
        default=[],
        help=
        'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.'
    )
    parser.add_argument(
        '-f',
        '--functions',
        dest='function_filter',
        action='append',
        default=[],
        help=
        'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)'
    )
    parser.add_argument(
        '-b',
        '--both',
        help=
        """only include functions matching both object and function filter (instead of either/or).
      If there is not at least one filter for each, this option will do nothing.""",
        action='store_true',
        dest='require_both')
    parser.add_argument(
        '-m',
        '--min-length',
        help='ignore functions with less instructions than this',
        default=5,
        type=int,
        action='store',
        dest='min_length')
    parser.add_argument(
        '--mode',
        help='diff mode to use',
        choices=['diff-ratio', 'feature-default', 'feature-mncount'],
        action='store',
        dest='mode')
    parser.add_argument('-s',
                        '--scale',
                        help='use scaling for feature-* modes',
                        action='store_true',
                        dest='scale')
    args = parser.parse_args()

    if len(args.function_filter) == 0 or len(args.object_filter) == 0:
        args.require_both = False

    fundb = DB()
    logging.info("Loading functions")
    x_all_fns = fundb.all_functions()
    all_fns = []
    allfn_namefilter_active = len(args.function_filter) > 0 or len(
        args.object_filter) > 0

    if len(args.object_filter):
        fundb.precache_containing_objects(None)

    mode = MODE_FNDIFF
    if args.mode == 'feature-default':
        mode = MODE_METRIC_EUCLID
    elif args.mode == 'feature-mncount':
        mode = MODE_HIST_EUCLID
    elif args.mode is not None:
        raise Exception("something went wrong, got %s as --mode" % args.mode)

    scaling = None
    if args.scale:
        logging.info("collecting feature scaling information...")
        scaling = make_scaling(x_all_fns)

    if allfn_namefilter_active:
        for fn in x_all_fns:
            if fn.disassembly and (len(fn.disassembly) < args.min_length):
                continue
            fname_matches = True in [
                filt in fn.name for filt in args.function_filter
            ]
            objnm_matches = True in [
                filt
                in fundb.get_object(fn.get_container_object_id()).get_path()
                for filt in args.object_filter
            ]

            if args.require_both:
                if fname_matches and objnm_matches:
                    all_fns.append(fn)
            else:
                if fname_matches or objnm_matches:
                    all_fns.append(fn)
    else:
        all_fns = list(x_all_fns)
    del x_all_fns

    logging.info("Loaded functions, initializing analysis")

    if len(all_fns) == 0:
        logging.warning("no functions to analyze")
        return

    l = []
    if len(args.function_a):
        fun_dict = fundb.get_functions_by_shortname(args.function_a)
        # verify that there are actually functions to analyze
        has_fna = False
        for funname in fun_dict:
            if fun_dict[funname] is not None:
                has_fna = True
                break
        if not has_fna:
            logging.error("could not find any 'function a'")
            return
        l = m_to_n_compare(fundb, fun_dict, all_fns, mode, scaling)
    else:
        l = n_to_n_compare(fundb, all_fns, mode, scaling)

    print "done."
    l.sort(reverse=(mode == MODE_FNDIFF))

    for i in range(len(l)):
        print("%.3f" % l[i][0]), l[i][1:]