def create_acfg_process(q, lock, counter): try: while True: fpath = None arch = None try: fpath = q.get(True, 5) except queue.Empty as e: return try: acfg = create_acfg_from_file(fpath, 'x86_64_O3') except: print('!!! Failed to process {}. !!!'.format(fpath)) print('Unexpected exception in list_function_names: {}'.format( traceback.format_exc())) continue path_without_ext = os.path.splitext(fpath)[0] acfg_path = path_without_ext + '.acfg.plk' with open(acfg_path, 'wb') as f: pickle.dump(acfg, f) lock.acquire() counter.value += 1 lock.release() except Exception as e: print(e) traceback.print_exc()
def main(argv): parser = argparse.ArgumentParser( description='Create ACFG for each dot file under _function directory.') parser.add_argument('RootDir', help='A root directory to process.') args = parser.parse_args() if not os.path.isdir(args.RootDir): print('{} is not a valid folder.'.format(args.RootDir)) sys.exit(-1) bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength) counter = 0 # Parse each file name pattern to extract arch, binary name(problem id) for dirpath, dirnames, filenames in os.walk(args.RootDir): for filename in filenames: if dirpath.endswith('_functions') and filename.endswith('.dot'): dot = os.path.join(dirpath, filename) if os.stat(dot).st_size == 0: os.remove(dot) continue arch = 'x86_64_O0' try: acfg = create_acfg_from_file(dot, arch) path_without_ext = os.path.splitext(dot)[0] acfg_path = path_without_ext + '.acfg.plk' with open(acfg_path, 'wb') as f: pickle.dump(acfg, f) except: print('!!! Failed to process {}. !!!'.format(dot)) continue counter += 1 bar.update(counter)
def build_func_embs(funcs_dot_dict, arch, sess, args, norm_graph_emb_inference, neighbors_test, attributes_test, u_init_test, create_cache=True, cache_path=None): func_embs = {} for func in funcs_dot_dict: func_dot = funcs_dot_dict[func] if not os.path.isfile(func_dot): # print('{} does not exist.'.format(func_dot)) embs = np.zeros(args.EmbeddingSize) else: dot_statinfo = os.stat(func_dot) if dot_statinfo.st_size == 0: embs = np.zeros(args.EmbeddingSize) else: path_without_ext = os.path.splitext(func_dot)[0] acfg_plk = path_without_ext + '.new_maxnode{}_emb{}.acfg.plk'.format(args.MaxNodeNum, args.EmbeddingSize) # Try to create function ACFG pickled file if pickle file do not exist if not os.path.isfile(acfg_plk): try: acfg = create_acfg_from_file(func_dot, arch) if create_cache: with open(acfg_plk, 'wb') as f: pickle.dump(acfg, f) except Exception as e: print('!!! Failed to process {}. !!!'.format(func_dot)) print('Exception: {}'.format(e)) print() continue else: with open(acfg_plk, 'rb') as f: acfg = pickle.load(f) if len(acfg) <= args.MaxNodeNum: neighbors, attributes, u_init = get_graph_info_mat({'graph': acfg}, args.MaxNodeNum, args.AttrDims, args.EmbeddingSize) embs = sess.run(norm_graph_emb_inference, {neighbors_test: [neighbors], attributes_test: [attributes], u_init_test: [u_init]})[0] else: raise IndexError('{} contain funciton {} which has more node num than {}. ({} > {})'.format(row['binary_path'], func, args.MaxNodeNum, len(acfg), args.MaxNodeNum)) func_embs[func] = embs if create_cache and not cache_path: with open(cache_path, 'wb') as f: pickle.dump(func_embs, f) return func_embs
def create_acfg_process(q, lock, sqlite_path, counter): try: TABLE_NAME = 'flow_graph_acfg' conn = sqlite3.connect(sqlite_path) cur = conn.cursor() while True: fpath = None arch = None try: fpath, arch, binary_path, bin_name, function_name, author_name, contest_name = q.get( True, 5) except queue.Empty as e: cur.close() conn.close() return try: acfg = create_acfg_from_file(fpath, arch) except: print('!!! Failed to process {}. !!!'.format(fpath)) print('Unexpected exception in list_function_names: {}'.format( traceback.format_exc())) continue path_without_ext = os.path.splitext(fpath)[0] acfg_path = path_without_ext + '.acfg.plk' with open(acfg_path, 'wb') as f: pickle.dump(acfg, f) cur.execute( 'INSERT INTO {} (binary_path, question, acfg_path, arch, function_name, author, contest) VALUES ("{}", "{}", "{}", "{}", "{}", "{}", "{}");' .format(TABLE_NAME, binary_path, bin_name, acfg_path, arch, function_name, author_name, contest_name)) conn.commit() lock.acquire() counter.value += 1 lock.release() except Exception as e: print(e) traceback.print_exc()
def main(argv): parser = argparse.ArgumentParser( description= 'Create ACFG for each binary given by list file parameter and output them as pickle file.' ) parser.add_argument( 'BinaryListFile', help='A text file contains a list of binary file path.') parser.add_argument( 'SQLiteFile', help='A output sqlite db file to save information about binaries.') args = parser.parse_args() with open(args.BinaryListFile, 'r') as f: lines = f.readlines() files = [ line.strip('\n') for line in lines if len(line.strip('\n')) != 0 ] TABLE_NAME = 'flow_graph_acfg' conn = sqlite3.connect(args.SQLiteFile) cur = conn.cursor() cur.execute( 'CREATE TABLE {} (binary_path text, acfg_path text, arch varchar(128), function_name varchar(1024), question varchar(64), author varchar(128), contest varchar(256));' .format(TABLE_NAME)) cur.execute( 'CREATE INDEX binary_path ON {}(binary_path);'.format(TABLE_NAME)) cur.execute('CREATE INDEX arch ON {}(arch);'.format(TABLE_NAME)) cur.execute( 'CREATE INDEX function_name ON {}(function_name);'.format(TABLE_NAME)) cur.execute('CREATE INDEX question ON {}(question);'.format(TABLE_NAME)) cur.execute('CREATE INDEX author ON {}(author);'.format(TABLE_NAME)) cur.execute('CREATE INDEX contest ON {}(contest);'.format(TABLE_NAME)) conn.commit() bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength) counter = 0 # Parse each file name pattern to extract arch, binary name(problem id) for binary_path in files: author_name = os.path.basename( os.path.abspath(os.path.join(binary_path, os.pardir))) contest_name = os.path.basename( os.path.abspath(os.path.join(binary_path, os.pardir, os.pardir))) file_name = os.path.basename(binary_path) file_name = os.path.splitext(file_name)[0] pattern = r'(.+)\.(.+)' items = re.findall(pattern, file_name)[0] bin_name = items[0] arch = items[1] functions_folder = os.path.splitext(binary_path)[0] + '_functions' func_list_path = os.path.join(functions_folder, 'valid_func_list.txt') if not os.path.isfile(func_list_path): continue with open(func_list_path, 'r') as f: valid_funcs = [l.strip().split(' ')[1] for l in f.readlines()] # For each dot file of function, transfer it into ACFG for function_name in valid_funcs: fpath = os.path.join(functions_folder, function_name + '.dot') path_without_ext = os.path.splitext(fpath)[0] try: acfg = create_acfg_from_file(fpath, arch) except: print('!!! Failed to process {}. !!!'.format(fpath)) continue acfg_path = path_without_ext + '.acfg.plk' with open(acfg_path, 'wb') as f: pickle.dump(acfg, f) cur.execute( 'INSERT INTO {} (binary_path, question, acfg_path, arch, function_name, author, contest) VALUES ("{}", "{}", "{}", "{}", "{}", "{}", "{}");' .format(TABLE_NAME, binary_path, bin_name, acfg_path, arch, function_name, author_name, contest_name)) conn.commit() counter += 1 bar.update(counter) conn.close()