def extract_features(bin_name): global feature_funcs bin_name, func_data_list = load_func_data(bin_name) fm = FeatureManager() for func_data in func_data_list: features = fm.get_all(func_data) func_data["feature"] = features store_func_data(bin_name, func_data_list)
def extract_func_types(args): type_map, bin_name = args bin_name, func_data_list = load_func_data(bin_name) for func in func_data_list: ret_type = fetch_type(type_map, func["ret_type"]) arg_types = [] for idx, var_name, t, _ in func["args"]: arg_types.append(fetch_type(type_map, t)) func["abstract_args_type"] = arg_types func["abstract_ret_type"] = ret_type store_func_data(bin_name, func_data_list)
def extract_func_types(args): # TODO: handle suffix correctly. type_map, bin_name = args bin_name, func_data_list = load_func_data(bin_name, suffix="filtered") for func in func_data_list: ret_type = fetch_type(type_map, func["ret_type"]) arg_types = [] for idx, var_name, t, _ in func["args"]: arg_types.append(fetch_type(type_map, t)) func["abstract_args_type"] = arg_types func["abstract_ret_type"] = ret_type store_func_data(bin_name, func_data_list, suffix="filtered")
def filter_funcs(bin_path): global g_oracle bin_path, func_data_list = load_func_data(bin_path) func_data_list = sorted(func_data_list, key=lambda x: x['name']) num_orig_funcs = len(func_data_list) pack_name = func_data_list[0]['package'] # filter functions by segment name (consider functions in code segment) funcs = list(filter(lambda x: x['seg_name'] == '.text', func_data_list)) num_code_funcs = len(funcs) funcs = list(filter(lambda x: 'src_path' in x and x['src_path'], funcs)) num_src_funcs = len(funcs) # To identify functions inserted by compilers #for func in funcs: # if func['package'] not in func['src_file']: # print(func['name'], func['src_file'], func['src_line']) # filter functions by package name (remove functions inserted by compilers) funcs = list(filter(lambda x: pack_name in x['src_path'], funcs)) num_pack_funcs = len(funcs) if num_pack_funcs == 0: print("No functions: ", pack_name, bin_path, num_orig_funcs) funcs = list(filter(lambda x: not x['name'].startswith('sub_'), funcs)) num_sub_funcs = len(funcs) names = set(map(lambda x: x['name'], funcs)) sources = set(map(lambda x: (x['src_file'], x['src_line']), funcs)) if g_oracle: package, compiler, arch, opti, bin_name = parse_fname(bin_path) funcs = list(filter( lambda x: x['src_file'] in g_oracle[pack_name][bin_name] and x['src_line'] in g_oracle[pack_name][bin_name][x['src_file']], funcs)) # TODO: handle suffix correctly. store_func_data(bin_path, funcs, suffix="filtered") num_oracle_funcs = len(funcs) num_readelf_funcs = 0 # if g_oracle: # cmd = "readelf -s {} | grep FUNC | grep -v UND | wc -l".format(bin_path) # cmd = " objdump --syms -j .text {} | grep \"F .text\" | ".format(bin_path) # cmd += " cut -d \" \" -f 1 | sort | uniq | wc -l" # num_readelf_funcs = int(system(cmd)) num_funcs = (num_orig_funcs, num_code_funcs, num_src_funcs, num_pack_funcs, num_sub_funcs, num_oracle_funcs, num_readelf_funcs) return pack_name, bin_path, num_funcs, names, sources
def extract_features(bin_name): global feature_funcs # TODO: handle suffix correctly. bin_name, func_data_list = load_func_data(bin_name, suffix="filtered") fm = FeatureManager() for func_data in func_data_list: try: features = fm.get_all(func_data) except: import traceback traceback.print_exc() print("Error: ", bin_name) return func_data["feature"] = features store_func_data(bin_name, func_data_list, suffix="filtered2")
def extract_func_lineno(bin_name): try: bin_name, func_data_list = load_func_data(bin_name) except: print(bin_name) return bin_name func_addrs = dict(map(lambda x: (x["startEA"], x["name"]), func_data_list)) line_map = fetch_lineno(bin_name, func_addrs) for func in func_data_list: func_addr = func["startEA"] if func_addr not in line_map or not line_map[func_addr][0]: continue func["src_path"] = line_map[func_addr][0] func["src_file"] = parse_source_path(func["src_path"]) func["src_line"] = line_map[func_addr][1] # Fix ase18 source paths coreutils-6.7-6.5 / coreutils-6.7-6.7 if 'coreutils-6.7-6.5' in func['src_path']: func['src_path'] = func['src_path'].replace('6.7-6.5', '6.5') store_func_data(bin_name, func_data_list) return
"arch": arch, "opti": opti, "others": other_option, "bin_name": bin_name, "func_type": func_type, "ret_type": ret_type, "args": args, "callers": caller_map[func_name], "callees": callee_map[func_name], "imported_callees": imported_callees, "cfg": edge_map[func_name], "strings": func_strings, "consts": func_consts, "bb_data": bb_data, }) return func_data init_idc() try: func_data = main() except: import traceback traceback.print_exc() ida_pro.qexit(1) else: bin_path = ida_nalt.get_input_file_path() store_func_data(bin_path, func_data) ida_pro.qexit(0)