def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='dump objectfile/function structure from the database') parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [], help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.') parser.add_argument('-f', '--functions', dest='function_filter', action='append', default = [], help='only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)') parser.add_argument('-b', '--both', help="""only include functions matching both object and function filter (instead of either/or). If there is not at least one filter for each, this option will do nothing.""", action='store_true', dest='require_both') parser.add_argument('-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int, action='store', dest='min_length') args = parser.parse_args() if len(args.function_filter) == 0 or len(args.object_filter) == 0: args.require_both = False fundb = DB() x_all_fns = list(fundb.all_functions()) all_fns = [] allfn_namefilter_active = len(args.function_filter) > 0 or len(args.object_filter) > 0 if len(args.object_filter): fundb.precache_containing_objects(None) if allfn_namefilter_active: for fn in x_all_fns: if len(fn.disassembly) < args.min_length: continue fname_matches = True in [filt in fn.name for filt in args.function_filter] objnm_matches = True in [filt in fn.in_object for filt in args.object_filter] if args.require_both: if fname_matches and objnm_matches: all_fns.append(fn) else: if fname_matches or objnm_matches: all_fns.append(fn) else: all_fns = x_all_fns del x_all_fns if len(all_fns) == 0: logging.error("no functions to print") return # put functions back into an object dict objdict = {} for fun in all_fns: if not fun.object_id in objdict: objdict[fun.object_id] = [] objdict[fun.object_id].append(fun) for objectid in objdict: obj = fundb.get_object(objectid) print obj.get_path() for fun in objdict[objectid]: print ">>", fun.get_shortinfo(obj.get_compileopts())
def repo(self, reponame): # TODO add support for reponame=None fdb = PgFunDB() object_ids = fdb.get_objectids_matching(repository_is=reponame) objects = fdb.get_objects(object_ids) template = env.get_template("repo.tpl.html") return template.render(title="Repository %s" % (reponame), reponame=reponame, objects=objects)
def main(): logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(description='perform diff actions between functions in the database') parser.add_argument('-a', '--repository-a', dest='training_sets', default=[], action="append", help='compare given function to all others (filters apply). Can be specified multiple times.') parser.add_argument('-b', '--repository-b', dest='test_set', default=None, help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.') parser.add_argument('-l', '--list', dest='list', action='store_true', help='list repositories', default=None) parser.add_argument('-f', '--list-functions', dest='list_functions_in', help='list functions in given repository', default=None) FeatureArg.apply(parser) args = parser.parse_args() fdb = DB() if args.list: i = 0 for repo in fdb.get_repository_names(): print repo i += 1 print "%d repositories." % i return if args.list_functions_in: i = 0 for fun in fdb.get_functions_by_repository(args.list_functions_in): print fun.get_shortinfo(db=fdb) i += 1 print "%d functions in %s." % (i, args.list_functions_in) return if not len(args.training_sets) or args.test_set is None: logging.error("ERROR: Either -l, -f or both -a and -b are required.") return functions_a = list(fdb.get_function_texts_by_repository(args.training_sets)) functions_b = list(fdb.get_function_texts_by_repository(args.test_set)) logging.info("repository %s: %d functions" % (args.training_sets, len(functions_a))) logging.info("repository %s: %d functions" % (args.test_set, len(functions_b))) if not len(functions_a): logging.error("repository %s has no functions" % (args.training_sets)) return if not len(functions_b): logging.error("repository %s has no functions" % (args.test_set)) return aggr = FeatureArg.get_aggregator(args) scale_features = FeatureArg.scale_features(args) do_knn(fdb, aggr, scale_features, args.training_sets, args.test_set) do_dist(fdb, aggr, scale_features, args.training_sets, args.test_set)
def setUp(self): sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql") self.postgresql = testing.postgresql.Postgresql() self.conn = psycopg2.connect(**self.postgresql.dsn()) with self.conn.cursor() as cursor: cursor.execute(open(sql_init_bare_fname, "r").read()) self.fdb = PgFunDB(conn=self.conn)
def repos(self, search=None, submit=None): if search is None: search = "" fdb = PgFunDB() repos_names = fdb.get_repository_names() repos_names = filter(lambda z: search in z, repos_names) template = env.get_template("repos.tpl.html") return template.render(title="repositories", repos=repos_names)
def fn_search(self, search=None, submit=None): functions = [] fdb = PgFunDB() if search is not None: functions = list(fdb.get_functions_matching_signature(search, limit=300)) template = env.get_template("find-fn.html") return template.render(title="Function search", functions=functions, search=search if search is not None else "")
def obj(self, id): fdb = PgFunDB() id = int(id) obj_ = fdb.get_object(id) if obj_ is None: raise cherrypy.HTTPError(404, "Object with ID %d not found" % id) assert isinstance(obj_, ObjectInfo) template = env.get_template('obj.tpl.html') title = 'showing object #%d' % (id) functions = list(fdb.get_functions_by_objectid(id)) return template.render(title=title, obj=obj_, functions=functions)
def fn(self, id): fdb = PgFunDB() id = int(id) fun = fdb.get_function_by_id(id, include_disassembly=True) if not fun: raise cherrypy.HTTPError(404, "Function with ID %s not found" % id) c_mt = self._make_features(self._optional_code()) f_mt = self._make_features(fun) template = env.get_template('fn.tpl.html') title = "showing function %d" % (fun.function_id) return template.render(title=title, fun=fun, fmetr=f_mt, cmetr=c_mt, metr_keys=sorted(f_mt.keys()))
def main(): available_features = counter_features.keys() logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='perform delete actions between functions in the database') parser.add_argument('-o', '--object', dest='objects', action='append', default = [], help='delete objects by full path', required=True) args = parser.parse_args() fundb = DB() for obj in args.objects: logging.info("deleting %s" % obj) fundb.delete_objects_by_filename(obj) fundb.save()
def setUp(self): sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql") logging.info("setting up database") self.postgresql = testing.postgresql.Postgresql() self.conn = psycopg2.connect(**self.postgresql.dsn()) with self.conn.cursor() as cursor: cursor.execute(open(sql_init_bare_fname, "r").read()) self.fdb = PgFunDB(conn=self.conn) logging.info("extracting functions") glibc = os.path.join(MCMATCH_BASE, "test/libc_data/libc-2.20.so") process_file(self.fdb, glibc, False, True) obj = self.fdb.get_object(self.fdb.get_objectids_matching(filename_is="libc-2.20.so")[0]) obj.get_compileopts().set_repository("glibc-2.20") self.fdb.set_compiler_options(obj) dietlibc = os.path.join(MCMATCH_BASE, "test/libc_data/dietlibc/libc.so") process_file(self.fdb, dietlibc) objids = self.fdb.get_objectids_matching(path_contains="test/libc_data/dietlibc/") for objid in objids: obj = self.fdb.get_object(objid) obj.get_compileopts().set_repository("dietlibc-0.33") self.fdb.set_compiler_options(obj) logging.info("creating features") feature_instances = [counter_features[m] for m in counter_features] for m in feature_instances: self.fdb.recreate_features_table(m) function_texts = self.fdb.get_function_texts(with_missing_features=feature_instances) for row in function_texts: text_id, signature, text = row c = Codeblock() c.disassembly_from_text(text) for m in counter_features: mcounter = counter_features[m] mcounter.calculate(c) self.fdb.store_features(text_id, mcounter) self.fdb.save()
def main(): available_features = counter_features.keys() logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( description='perform delete actions between functions in the database') parser.add_argument('-o', '--object', dest='objects', action='append', default=[], help='delete objects by full path', required=True) args = parser.parse_args() fundb = DB() for obj in args.objects: logging.info("deleting %s" % obj) fundb.delete_objects_by_filename(obj) fundb.save()
def main(): logging.basicConfig(level=logging.INFO) fdb = DB() new_functions = [] if len(sys.argv) > 1: for arg in sys.argv[1:]: if os.path.isdir(arg): new_functions += process_dir(fdb, arg)[0] elif os.path.isfile(arg): file_functions = process_file(fdb, arg, False, True) if file_functions is not None: new_functions += file_functions else: logging.error("i don't know what to do with argument %s") else: new_functions = process_dir(fdb, ".", 400) logging.info("scanning finished. found %d new functions. saving" % len(new_functions)) fdb.save()
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print "Usage:", sys.argv[0], " <function1> [function2]" return fdb = DB() fns = sys.argv[1:] funs = fdb.get_functions_by_shortname(fns) fnames = [] for fun in funs: if funs[fun] is None: logging.error("One or more functions couldn't be found, for example %s." % fun) return fnames.append(dump_disassembly_to_temp(funs[fun], Fn.DIFF_MNEMONIC | Fn.DIFF_PARAMETERS )) if len(fnames) == 1: subprocess.call(['gedit', fnames[0]]) elif len(fnames) == 2: print_stats(funs.values()[0], funs.values()[1]) subprocess.call(['meld', fnames[0], fnames[1]]) else: print "len(fnames) == %d (this is an error)" % len(fnames)
def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='find k-nearest-neighbours') parser.add_argument('-F', '--file', dest='file', action='append', default=[], help='use the given assembly file[s]') FeatureArg.apply(parser) args = parser.parse_args() if len(args.file) == 0: print "Error: need at least one file." return fdb = PgFunDB() metr = FeatureArg.get_aggregator(args) scale_features = FeatureArg.scale_features(args) knn = KNearestNeighbors(fdb, metr, 30, scale_features=scale_features) # TODO Cluster class should accept more than one Codeblock for f in args.file: knn_file(f, fdb, knn)
#!/usr/bin/python2 import sys import os from mcmatch.db.pg_database import PgFunDB fdb = PgFunDB() cmds = {} def repo(args): if not len(args) or args[0] == 'ls': rx = fdb.get_repository_names() for r in rx: print r cmds['repo'] = repo def _objfile_objids(args): for arg in args: arg = os.path.abspath(arg) objids = fdb.get_objectids_matching(path_is=arg) if not len(objids): print "Error: No object matching path %s" % (arg) continue yield objids def objfile_show(args): for objids in _objfile_objids(args):
def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( description='dump objectfile/function structure from the database') parser.add_argument( '-o', '--objects', dest='object_filter', action='append', default=[], help= 'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.' ) parser.add_argument( '-f', '--functions', dest='function_filter', action='append', default=[], help= 'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)' ) parser.add_argument( '-b', '--both', help= """only include functions matching both object and function filter (instead of either/or). If there is not at least one filter for each, this option will do nothing.""", action='store_true', dest='require_both') parser.add_argument( '-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int, action='store', dest='min_length') args = parser.parse_args() if len(args.function_filter) == 0 or len(args.object_filter) == 0: args.require_both = False fundb = DB() x_all_fns = list(fundb.all_functions()) all_fns = [] allfn_namefilter_active = len(args.function_filter) > 0 or len( args.object_filter) > 0 if len(args.object_filter): fundb.precache_containing_objects(None) if allfn_namefilter_active: for fn in x_all_fns: if len(fn.disassembly) < args.min_length: continue fname_matches = True in [ filt in fn.name for filt in args.function_filter ] objnm_matches = True in [ filt in fn.in_object for filt in args.object_filter ] if args.require_both: if fname_matches and objnm_matches: all_fns.append(fn) else: if fname_matches or objnm_matches: all_fns.append(fn) else: all_fns = x_all_fns del x_all_fns if len(all_fns) == 0: logging.error("no functions to print") return # put functions back into an object dict objdict = {} for fun in all_fns: if not fun.object_id in objdict: objdict[fun.object_id] = [] objdict[fun.object_id].append(fun) for objectid in objdict: obj = fundb.get_object(objectid) print obj.get_path() for fun in objdict[objectid]: print ">>", fun.get_shortinfo(obj.get_compileopts())
def knn(self, submit=False, graph=False, **flags): code = self._require_code() # build options for features features = Group("features") for feature_group in grouped_features: for available_feature in sorted(grouped_features[feature_group].keys()): features.add(CheckBox(available_feature), feature_group) preprocessing = Group("preprocessing") preprocessing.add(CheckBox('ftrscale', 'feature scaling')) preprocessing.add(CheckBox('pca', 'PCA')) preprocessing.add(CheckBox('randompca', 'Random PCA')) preprocessing.add(CheckBox('kernelpca', 'Kernel PCA')) #preprocessing.add(CheckBox('lmnn', 'LMNN')) #preprocessing.add(CheckBox('nca', 'NCA')) form = Form() form.addGroup(features) form.addGroup(preprocessing) template = env.get_template('knn.tpl.html') if submit == False and graph == False: return template.render(title='K-NearestNeighbors', form_inner=form.getHTML(), result='') form.updateState(flags) selected_features = filter(lambda feature: form.g('features').k(feature).value, all_features) opt_feature_scaling = form.g('preprocessing').k('ftrscale').value * TransformPipeline.TRANSFORM_SCALE opt_pca = form.g('preprocessing').k('pca').value * TransformPipeline.TRANSFORM_PCA opt_random_pca = form.g('preprocessing').k('randompca').value * TransformPipeline.TRANSFORM_RANDOM_PCA opt_kernel_pca = form.g('preprocessing').k('kernelpca').value * TransformPipeline.TRANSFORM_KERNEL_PCA #opt_lmnn = form.g('preprocessing').k('lmnn').value * TransformPipeline.TRANSFORM_LMNN #opt_nca = form.g('preprocessing').k('nca').value * TransformPipeline.TRANSFORM_NCA transform = opt_feature_scaling + opt_pca + opt_random_pca + opt_kernel_pca # + opt_lmnn + opt_nca c = Codeblock() c.disassembly_from_text(code) print c.get_mnemonic_histogram() metr = FeatureAggregator([all_features[m] for m in selected_features]) fdb = PgFunDB() repos = list(fdb.get_repository_names()) # TODO add select mode #repos = [r if r is not "None" else None for r in repos] repos = filter(lambda n: n != "musl-1.1.6" and n != 't-glibc', repos) knn = KNearestNeighbors(fdb, metr, 100, opt_feature_scaling, training_repositories=repos) distances, ft_info = knn.get_neighbours(c) function_text_ids = [f[0] for f in ft_info] # cache compileroptions for all loaded functions fdb.precache_containing_objects(fn_textids=function_text_ids) result = [] valueRange = (1e20, 0) for i in range(0, len(distances[0])): valueRange = (min(valueRange[0], distances[0][i]), max(valueRange[1], distances[0][i])) result.append((self._make_ft_checkbox(function_text_ids[i], flags), ("%6f" % distances[0][i]), FunctionTextTree(fdb, function_text_ids[i]))) plotsrc = None if graph: nn = DistanceInfo(fdb, metr, opt_feature_scaling, None, "euclidean") dists, tb_info = nn.test_codeblock(c) function_text_ids_ = [f[0] for f in nn.get_trainingset_infos()] additional_ftdids = [] if 'add_ftdids' in flags: additional_ftdids = [int(z) for z in flags['add_ftdids'].split(",")] equivalences = self._get_index_from_ftid(self._get_selected_ftids(flags) + additional_ftdids, function_text_ids_) function_names = [nn.get_trainingset_infos()[idx][1] for idx in equivalences] tb_info = (-1, "md5_process_block") nn.make_graph_single(dists[0], tb_info, equivalences, valueRange=(valueRange[0]*0.9, valueRange[1]*1.8), equiNames=function_names) plotsrc = self._pyplot_to_inline_image() return template.render(title='K-NearestNeighbors', form_inner=form.getHTML(), result=result, plotsrc=plotsrc)
class Test(unittest.TestCase): def setUp(self): sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql") self.postgresql = testing.postgresql.Postgresql() self.conn = psycopg2.connect(**self.postgresql.dsn()) with self.conn.cursor() as cursor: cursor.execute(open(sql_init_bare_fname, "r").read()) self.fdb = PgFunDB(conn=self.conn) def tearDown(self): self.conn.close() self.postgresql.stop() def testEmptyDB(self): self.assertEqual(0, len(list(self.fdb.all_functions(False)))) self.assertEqual(0, len(list(self.fdb.all_functions(True)))) self.assertEqual(None, self.fdb.get_function_by_id(1, True)) self.assertEqual(0, self.fdb.get_function_count()) self.assertEqual(0, len(list(self.fdb.get_repository_names()))) self.assertEqual([], self.fdb.get_objects([1])) def testOneObject(self): fn = Fn( "test", "void test(int x);", "test.c", ) obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None) obj.set_compileopts(CompilerOptions.from_string("gcc -O2")) self.fdb.store_object(obj) self.fdb.save() # Test valid matchers fids = self.fdb.get_objectids_matching(filename_is="test.o") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(filename_contains="test") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(path_contains="mp/") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test.o") self.assertEqual(fids, [1]) # Test invalid combinations fids = self.fdb.get_objectids_matching(path_contains="xp") self.assertEqual(fids, []) fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test") self.assertEqual(fids, []) obj = self.fdb.get_object(1) self.assertEquals(obj.get_path(), "/tmp/test.o") self.assertEquals(obj.get_compileopts().get_optlevel(), "2") self.assertEquals(obj.get_compileopts().get_compiler(), "gcc") self.assertEqual(1, len(list(self.fdb.all_functions(False)))) fun = self.fdb.get_function_by_id(1, False) self.assertEqual(1, fun.get_container_object_id()) def testTwoObjects(self): fn = Fn( "test", "void test(int x);", "test.c", ) obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None) obj.set_compileopts(CompilerOptions.from_string("gcc -O2")) self.fdb.store_object(obj) self.fdb.save() fn2 = Fn("test2", "void test2(int x);", "test2.c") fn3 = Fn("test", "void test(int x);", "test.c") obj = ObjectInfo("/tmp/test2.o", time.time(), [fn2, fn3], False, None) co2 = CompilerOptions() co2.set_compiler("gcc") co2.set_optlevel("1") co2.set_repository("test2") obj.set_compileopts(co2) self.fdb.store_object(obj) # don't save this one - functions should be visible # even without fdb.save() self.assertEqual(3, self.fdb.get_function_count()) self.assertEqual( 1, self.fdb.get_functions_by_shortname(['test/O2'], False))
def main(): logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser( description='perform diff actions between functions in the database') parser.add_argument( '-a', '--repository-a', dest='training_sets', default=[], action="append", help= 'compare given function to all others (filters apply). Can be specified multiple times.' ) parser.add_argument( '-b', '--repository-b', dest='test_set', default=None, help= 'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.' ) parser.add_argument('-l', '--list', dest='list', action='store_true', help='list repositories', default=None) parser.add_argument('-f', '--list-functions', dest='list_functions_in', help='list functions in given repository', default=None) FeatureArg.apply(parser) args = parser.parse_args() fdb = DB() if args.list: i = 0 for repo in fdb.get_repository_names(): print repo i += 1 print "%d repositories." % i return if args.list_functions_in: i = 0 for fun in fdb.get_functions_by_repository(args.list_functions_in): print fun.get_shortinfo(db=fdb) i += 1 print "%d functions in %s." % (i, args.list_functions_in) return if not len(args.training_sets) or args.test_set is None: logging.error("ERROR: Either -l, -f or both -a and -b are required.") return functions_a = list(fdb.get_function_texts_by_repository( args.training_sets)) functions_b = list(fdb.get_function_texts_by_repository(args.test_set)) logging.info("repository %s: %d functions" % (args.training_sets, len(functions_a))) logging.info("repository %s: %d functions" % (args.test_set, len(functions_b))) if not len(functions_a): logging.error("repository %s has no functions" % (args.training_sets)) return if not len(functions_b): logging.error("repository %s has no functions" % (args.test_set)) return aggr = FeatureArg.get_aggregator(args) scale_features = FeatureArg.scale_features(args) do_knn(fdb, aggr, scale_features, args.training_sets, args.test_set) do_dist(fdb, aggr, scale_features, args.training_sets, args.test_set)
class Test(unittest.TestCase): def setUp(self): sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql") self.postgresql = testing.postgresql.Postgresql() self.conn = psycopg2.connect(**self.postgresql.dsn()) with self.conn.cursor() as cursor: cursor.execute(open(sql_init_bare_fname, "r").read()) self.fdb = PgFunDB(conn=self.conn) def tearDown(self): self.conn.close() self.postgresql.stop() def testEmptyDB(self): self.assertEqual(0, len(list(self.fdb.all_functions(False)))) self.assertEqual(0, len(list(self.fdb.all_functions(True)))) self.assertEqual(None, self.fdb.get_function_by_id(1, True)) self.assertEqual(0, self.fdb.get_function_count()) self.assertEqual(0, len(list(self.fdb.get_repository_names()))) self.assertEqual([], self.fdb.get_objects([1])) def testOneObject(self): fn = Fn("test", "void test(int x);", "test.c", ) obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None) obj.set_compileopts(CompilerOptions.from_string("gcc -O2")) self.fdb.store_object(obj) self.fdb.save() # Test valid matchers fids = self.fdb.get_objectids_matching(filename_is="test.o") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(filename_contains="test") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(path_contains="mp/") self.assertEqual(fids, [1]) fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test.o") self.assertEqual(fids, [1]) # Test invalid combinations fids = self.fdb.get_objectids_matching(path_contains="xp") self.assertEqual(fids, []) fids = self.fdb.get_objectids_matching(path_contains="mp", filename_contains="test", filename_is="test") self.assertEqual(fids, []) obj = self.fdb.get_object(1) self.assertEquals(obj.get_path(), "/tmp/test.o") self.assertEquals(obj.get_compileopts().get_optlevel(), "2") self.assertEquals(obj.get_compileopts().get_compiler(), "gcc") self.assertEqual(1, len(list(self.fdb.all_functions(False)))) fun = self.fdb.get_function_by_id(1, False) self.assertEqual(1, fun.get_container_object_id()) def testTwoObjects(self): fn = Fn("test", "void test(int x);", "test.c", ) obj = ObjectInfo("/tmp/test.o", time.time(), [fn], False, None) obj.set_compileopts(CompilerOptions.from_string("gcc -O2")) self.fdb.store_object(obj) self.fdb.save() fn2 = Fn("test2", "void test2(int x);", "test2.c") fn3 = Fn("test", "void test(int x);", "test.c") obj = ObjectInfo("/tmp/test2.o", time.time(), [fn2, fn3], False, None) co2 = CompilerOptions() co2.set_compiler("gcc") co2.set_optlevel("1") co2.set_repository("test2") obj.set_compileopts(co2) self.fdb.store_object(obj) # don't save this one - functions should be visible # even without fdb.save() self.assertEqual(3, self.fdb.get_function_count()) self.assertEqual(1, self.fdb.get_functions_by_shortname(['test/O2'], False))
def main(): logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') parser = argparse.ArgumentParser( description='set compile options for the given objects') #parser.add_argument('-s', '--opt-string', type=str, dest='optstring', action='store', required=True, # help='compiler command line parameters (for example, gcc -O2 -static)') parser.add_argument('-O', '--opt-level', type=str, help='optimization level', dest='optlevel', default=None) parser.add_argument('-c', '--compiler', type=str, help='compiler name', dest='compiler', default=None) parser.add_argument('-v', '--compiler-version', type=str, help='compiler version', dest='compiler_version', default=None) parser.add_argument('-r', '--repository', type=str, help='set repository (ex.: git-1.1)', default=None) parser.add_argument('objfiles', metavar="f.o", type=str, nargs='+', help='object files to update') args = parser.parse_args() #compopts = CompilerOptions.from_string(args.optstring) compopts = CompilerOptions() compopts.compiler = args.compiler compopts.compiler_version = args.compiler_version compopts.opt = args.optlevel compopts.repository = args.repository logging.info("setting %s on %d objects" % (compopts.get_shortinfo(), len(args.objfiles))) db = DB() counter = 0 for obj in args.objfiles: obj = os.path.abspath(obj) mtime = os.stat(obj).st_mtime result = db.set_compiler_options_by_path(obj, mtime, compopts) if result: counter += 1 else: logging.warning("no update performed on object %s." % obj) db.save() logging.info("Updated compiler info for %d file(s)" % counter)
def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='perform diff actions between functions in the database') parser.add_argument('-a', '--function-a', dest='function_a', action='append', default = [], help='compare given function to all others (filters apply). Can be specified multiple times.') parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [], help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.') parser.add_argument('-f', '--functions', dest='function_filter', action='append', default = [], help='only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)') parser.add_argument('-b', '--both', help="""only include functions matching both object and function filter (instead of either/or). If there is not at least one filter for each, this option will do nothing.""", action='store_true', dest='require_both') parser.add_argument('-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int, action='store', dest='min_length') parser.add_argument('--mode', help='diff mode to use', choices=['diff-ratio', 'feature-default', 'feature-mncount'], action='store', dest='mode') parser.add_argument('-s', '--scale', help='use scaling for feature-* modes', action='store_true', dest='scale') args = parser.parse_args() if len(args.function_filter) == 0 or len(args.object_filter) == 0: args.require_both = False fundb = DB() logging.info("Loading functions") x_all_fns = fundb.all_functions() all_fns = [] allfn_namefilter_active = len(args.function_filter) > 0 or len(args.object_filter) > 0 if len(args.object_filter): fundb.precache_containing_objects(None) mode = MODE_FNDIFF if args.mode == 'feature-default': mode = MODE_METRIC_EUCLID elif args.mode == 'feature-mncount': mode = MODE_HIST_EUCLID elif args.mode is not None: raise Exception("something went wrong, got %s as --mode" % args.mode) scaling = None if args.scale: logging.info("collecting feature scaling information...") scaling = make_scaling(x_all_fns) if allfn_namefilter_active: for fn in x_all_fns: if fn.disassembly and (len(fn.disassembly) < args.min_length): continue fname_matches = True in [filt in fn.name for filt in args.function_filter] objnm_matches = True in [filt in fundb.get_object(fn.get_container_object_id()).get_path() for filt in args.object_filter] if args.require_both: if fname_matches and objnm_matches: all_fns.append(fn) else: if fname_matches or objnm_matches: all_fns.append(fn) else: all_fns = list(x_all_fns) del x_all_fns logging.info("Loaded functions, initializing analysis") if len(all_fns) == 0: logging.warning("no functions to analyze") return l = [] if len(args.function_a): fun_dict = fundb.get_functions_by_shortname(args.function_a) # verify that there are actually functions to analyze has_fna = False for funname in fun_dict: if fun_dict[funname] is not None: has_fna = True break if not has_fna: logging.error("could not find any 'function a'") return l = m_to_n_compare(fundb, fun_dict, all_fns, mode, scaling) else: l = n_to_n_compare(fundb, all_fns, mode, scaling) print "done." l.sort(reverse=(mode == MODE_FNDIFF)) for i in range(len(l)): print ("%.3f" % l[i][0]), l[i][1:]
class Test(unittest.TestCase): def setUp(self): sql_init_bare_fname = os.path.join(MCMATCH_BASE, "sql/init_bare.psql") logging.info("setting up database") self.postgresql = testing.postgresql.Postgresql() self.conn = psycopg2.connect(**self.postgresql.dsn()) with self.conn.cursor() as cursor: cursor.execute(open(sql_init_bare_fname, "r").read()) self.fdb = PgFunDB(conn=self.conn) logging.info("extracting functions") glibc = os.path.join(MCMATCH_BASE, "test/libc_data/libc-2.20.so") process_file(self.fdb, glibc, False, True) obj = self.fdb.get_object(self.fdb.get_objectids_matching(filename_is="libc-2.20.so")[0]) obj.get_compileopts().set_repository("glibc-2.20") self.fdb.set_compiler_options(obj) dietlibc = os.path.join(MCMATCH_BASE, "test/libc_data/dietlibc/libc.so") process_file(self.fdb, dietlibc) objids = self.fdb.get_objectids_matching(path_contains="test/libc_data/dietlibc/") for objid in objids: obj = self.fdb.get_object(objid) obj.get_compileopts().set_repository("dietlibc-0.33") self.fdb.set_compiler_options(obj) logging.info("creating features") feature_instances = [counter_features[m] for m in counter_features] for m in feature_instances: self.fdb.recreate_features_table(m) function_texts = self.fdb.get_function_texts(with_missing_features=feature_instances) for row in function_texts: text_id, signature, text = row c = Codeblock() c.disassembly_from_text(text) for m in counter_features: mcounter = counter_features[m] mcounter.calculate(c) self.fdb.store_features(text_id, mcounter) self.fdb.save() def tearDown(self): self.conn.close() self.postgresql.stop() def testRepoDB(self): self.assertEqual(1, len(self.fdb.get_objectids_matching(repository_is="dietlibc-0.33"))) self.assertEqual(1, len(self.fdb.get_objectids_matching(repository_is="glibc-2.20"))) self.assertEqual(640, len(list(self.fdb.get_functions_by_repository("dietlibc-0.33")))) self.assertEqual(2468, len(list(self.fdb.get_functions_by_repository("glibc-2.20")))) def testAB(self): all_features = FeatureAggregator([counter_features[c] for c in counter_features]) di = analyze.DistanceInfo(self.fdb, all_features, training_repositories=['glibc-2.20'], ) pairwise_d, testset_infos = di.test(self.fdb, in_repositories=['dietlibc-0.33']) training_infos = di.get_trainingset_infos() em = analyze.DistanceInfo.make_equivalence_map(testset_infos, training_infos) good, bad, other = 0, 0, 0 for i in range(0, len(em)): res = analyze.DistanceInfo.get_partition_sizes(pairwise_d[i], None, em[i]) for el in res: if el[0] < el[2]: good += 1 elif el[0] > el[2]: bad += 1 else: other += 1 self.assertEqual(101, good) self.assertEqual(69, bad) print other
def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='perform diff actions between functions in the database') #parser.add_argument('-o', '--objects', dest='object_filter', action='append', default = [], # help='only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.') parser.add_argument('-x', '--force', dest='force', help='clear all statistics', action="store_true") parser.add_argument('-X', '--recreate-tables', help='recreate all feature tables and exit', action='store_true', dest='recreate_tables') parser.add_argument('-m', '--feature', dest='feature', choices=all_features.keys(), nargs='*') args = parser.parse_args() fundb = DB() match_features = [] for mtr in all_features: if args.feature is not None and len(args.feature) and not mtr in args.feature: continue match_features.append(mtr) if args.recreate_tables: for m in match_features: logging.info("recreating table for feature %s" % m) fundb.recreate_features_table(all_features[m]) fundb.save() return if args.force: for m in match_features: logging.info("clearing data for feature %s" % m) fundb.delete_feature_data(all_features[m]) logging.info("looking for missing features") function_texts = fundb.get_function_texts(with_missing_features=[all_features[m] for m in match_features]) if len(function_texts) == 0: logging.warning("seems like everything is already up-to-date.") return logging.info("done, starting calculations") prog = NProgressPrinter(len(function_texts)) for row in function_texts: prog.bump() text_id, signature, text = row c = Codeblock() c.disassembly_from_text(text) logging.debug("updating features for %d/%s..." % (text_id, signature)) for m in all_features: mcounter = all_features[m] mcounter.calculate(c) fundb.store_features(text_id, mcounter) fundb.save() fundb.save()
def main(): logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( description='perform diff actions between functions in the database') parser.add_argument( '-a', '--function-a', dest='function_a', action='append', default=[], help= 'compare given function to all others (filters apply). Can be specified multiple times.' ) parser.add_argument( '-o', '--objects', dest='object_filter', action='append', default=[], help= 'only process functions in objects whose name contains this parameter. Can be specified multiple times to match name against any of the list.' ) parser.add_argument( '-f', '--functions', dest='function_filter', action='append', default=[], help= 'only process functions with names containing this parameter. Can be specified multiple times (matches any of the parameters)' ) parser.add_argument( '-b', '--both', help= """only include functions matching both object and function filter (instead of either/or). If there is not at least one filter for each, this option will do nothing.""", action='store_true', dest='require_both') parser.add_argument( '-m', '--min-length', help='ignore functions with less instructions than this', default=5, type=int, action='store', dest='min_length') parser.add_argument( '--mode', help='diff mode to use', choices=['diff-ratio', 'feature-default', 'feature-mncount'], action='store', dest='mode') parser.add_argument('-s', '--scale', help='use scaling for feature-* modes', action='store_true', dest='scale') args = parser.parse_args() if len(args.function_filter) == 0 or len(args.object_filter) == 0: args.require_both = False fundb = DB() logging.info("Loading functions") x_all_fns = fundb.all_functions() all_fns = [] allfn_namefilter_active = len(args.function_filter) > 0 or len( args.object_filter) > 0 if len(args.object_filter): fundb.precache_containing_objects(None) mode = MODE_FNDIFF if args.mode == 'feature-default': mode = MODE_METRIC_EUCLID elif args.mode == 'feature-mncount': mode = MODE_HIST_EUCLID elif args.mode is not None: raise Exception("something went wrong, got %s as --mode" % args.mode) scaling = None if args.scale: logging.info("collecting feature scaling information...") scaling = make_scaling(x_all_fns) if allfn_namefilter_active: for fn in x_all_fns: if fn.disassembly and (len(fn.disassembly) < args.min_length): continue fname_matches = True in [ filt in fn.name for filt in args.function_filter ] objnm_matches = True in [ filt in fundb.get_object(fn.get_container_object_id()).get_path() for filt in args.object_filter ] if args.require_both: if fname_matches and objnm_matches: all_fns.append(fn) else: if fname_matches or objnm_matches: all_fns.append(fn) else: all_fns = list(x_all_fns) del x_all_fns logging.info("Loaded functions, initializing analysis") if len(all_fns) == 0: logging.warning("no functions to analyze") return l = [] if len(args.function_a): fun_dict = fundb.get_functions_by_shortname(args.function_a) # verify that there are actually functions to analyze has_fna = False for funname in fun_dict: if fun_dict[funname] is not None: has_fna = True break if not has_fna: logging.error("could not find any 'function a'") return l = m_to_n_compare(fundb, fun_dict, all_fns, mode, scaling) else: l = n_to_n_compare(fundb, all_fns, mode, scaling) print "done." l.sort(reverse=(mode == MODE_FNDIFF)) for i in range(len(l)): print("%.3f" % l[i][0]), l[i][1:]