Esempio n. 1
0
def extract_features(bin_name):
    global feature_funcs
    bin_name, func_data_list = load_func_data(bin_name)
    fm = FeatureManager()
    for func_data in func_data_list:
        features = fm.get_all(func_data)
        func_data["feature"] = features
    store_func_data(bin_name, func_data_list)
def extract_func_types(args):
    type_map, bin_name = args
    bin_name, func_data_list = load_func_data(bin_name)
    for func in func_data_list:
        ret_type = fetch_type(type_map, func["ret_type"])
        arg_types = []
        for idx, var_name, t, _ in func["args"]:
            arg_types.append(fetch_type(type_map, t))
        func["abstract_args_type"] = arg_types
        func["abstract_ret_type"] = ret_type
    store_func_data(bin_name, func_data_list)
Esempio n. 3
0
def extract_func_types(args):
    # TODO: handle suffix correctly.
    type_map, bin_name = args
    bin_name, func_data_list = load_func_data(bin_name, suffix="filtered")
    for func in func_data_list:
        ret_type = fetch_type(type_map, func["ret_type"])
        arg_types = []
        for idx, var_name, t, _ in func["args"]:
            arg_types.append(fetch_type(type_map, t))
        func["abstract_args_type"] = arg_types
        func["abstract_ret_type"] = ret_type
    store_func_data(bin_name, func_data_list, suffix="filtered")
Esempio n. 4
0
def filter_funcs(bin_path):
    global g_oracle
    bin_path, func_data_list = load_func_data(bin_path)
    func_data_list = sorted(func_data_list, key=lambda x: x['name'])
    num_orig_funcs = len(func_data_list)
    pack_name = func_data_list[0]['package']

    # filter functions by segment name (consider functions in code segment)
    funcs = list(filter(lambda x: x['seg_name'] == '.text', func_data_list))
    num_code_funcs = len(funcs)

    funcs = list(filter(lambda x: 'src_path' in x and x['src_path'], funcs))
    num_src_funcs = len(funcs)

    # To identify functions inserted by compilers
    #for func in funcs:
    #    if func['package'] not in func['src_file']:
    #        print(func['name'], func['src_file'], func['src_line'])

    # filter functions by package name (remove functions inserted by compilers)
    funcs = list(filter(lambda x: pack_name in x['src_path'], funcs))
    num_pack_funcs = len(funcs)

    if num_pack_funcs == 0:
        print("No functions: ", pack_name, bin_path, num_orig_funcs)

    funcs = list(filter(lambda x: not x['name'].startswith('sub_'), funcs))
    num_sub_funcs = len(funcs)

    names = set(map(lambda x: x['name'], funcs))
    sources = set(map(lambda x: (x['src_file'], x['src_line']), funcs))

    if g_oracle:
        package, compiler, arch, opti, bin_name = parse_fname(bin_path)
        funcs = list(filter(
            lambda x:
            x['src_file'] in g_oracle[pack_name][bin_name]
            and x['src_line'] in g_oracle[pack_name][bin_name][x['src_file']],
            funcs))
        # TODO: handle suffix correctly.
        store_func_data(bin_path, funcs, suffix="filtered")
    num_oracle_funcs = len(funcs)
    num_readelf_funcs = 0
#    if g_oracle:
#        cmd = "readelf -s {} | grep FUNC | grep -v UND | wc -l".format(bin_path)
#        cmd = " objdump --syms -j .text {} | grep \"F .text\" | ".format(bin_path)
#        cmd += " cut -d \" \" -f 1 | sort | uniq | wc -l"
#        num_readelf_funcs = int(system(cmd))
    num_funcs = (num_orig_funcs, num_code_funcs, num_src_funcs, num_pack_funcs,
                 num_sub_funcs, num_oracle_funcs, num_readelf_funcs)
    return pack_name, bin_path, num_funcs, names, sources
Esempio n. 5
0
def extract_features(bin_name):
    global feature_funcs
    # TODO: handle suffix correctly.
    bin_name, func_data_list = load_func_data(bin_name, suffix="filtered")
    fm = FeatureManager()
    for func_data in func_data_list:
        try:
            features = fm.get_all(func_data)
        except:
            import traceback
            traceback.print_exc()
            print("Error: ", bin_name)
            return
        func_data["feature"] = features
    store_func_data(bin_name, func_data_list, suffix="filtered2")
Esempio n. 6
0
def extract_func_lineno(bin_name):
    try:
        bin_name, func_data_list = load_func_data(bin_name)
    except:
        print(bin_name)
        return bin_name

    func_addrs = dict(map(lambda x: (x["startEA"], x["name"]), func_data_list))
    line_map = fetch_lineno(bin_name, func_addrs)
    for func in func_data_list:
        func_addr = func["startEA"]
        if func_addr not in line_map or not line_map[func_addr][0]:
            continue
        func["src_path"] = line_map[func_addr][0]
        func["src_file"] = parse_source_path(func["src_path"])
        func["src_line"] = line_map[func_addr][1]
        # Fix ase18 source paths coreutils-6.7-6.5 / coreutils-6.7-6.7
        if 'coreutils-6.7-6.5' in func['src_path']:
            func['src_path'] = func['src_path'].replace('6.7-6.5', '6.5')
    store_func_data(bin_name, func_data_list)
    return
Esempio n. 7
0
            "arch": arch,
            "opti": opti,
            "others": other_option,
            "bin_name": bin_name,
            "func_type": func_type,
            "ret_type": ret_type,
            "args": args,
            "callers": caller_map[func_name],
            "callees": callee_map[func_name],
            "imported_callees": imported_callees,
            "cfg": edge_map[func_name],
            "strings": func_strings,
            "consts": func_consts,
            "bb_data": bb_data,
        })
    return func_data


init_idc()
try:
    func_data = main()
except:
    import traceback

    traceback.print_exc()
    ida_pro.qexit(1)
else:
    bin_path = ida_nalt.get_input_file_path()
    store_func_data(bin_path, func_data)
    ida_pro.qexit(0)