Ejemplo n.º 1
0
def load_func_features_helper(bin_paths):
    # returns {function_key: {option_idx: np.array(feature_values)}}
    global g_options, g_features
    func_features = {}
    num_features = len(g_features)
    optionidx_map = get_optionidx_map(g_options)
    for bin_path in bin_paths:
        package, compiler, arch, opti, bin_name = parse_fname(bin_path)
        others = parse_other_options(bin_path)
        _, func_data_list = load_func_data(bin_path)
        for func_data in func_data_list:
            # Use only .text functions for testing
            if func_data["seg_name"] != ".text":
                continue
            if func_data["name"].startswith("sub_"):
                continue
            func_key = (package, bin_name, func_data["name"])
            option_key = (opti, arch, compiler, others)
            if option_key not in optionidx_map:
                continue
            option_idx = optionidx_map[option_key]
            if func_key not in func_features:
                func_features[func_key] = {}
            if option_key not in func_features[func_key]:
                func_features[func_key][option_idx] = np.zeros(
                    num_features, dtype=np.float64)
            for feature_idx, feature in enumerate(g_features):
                if feature not in func_data["feature"]:
                    continue
                val = func_data["feature"][feature]
                func_features[func_key][option_idx][feature_idx] = val

    return func_features
Ejemplo n.º 2
0
def load_func_features_helper(bin_paths):
    # TODO: handle suffix correctly.
    # returns {function_key: {option_idx: np.array(feature_values)}}
    global g_options, g_features, g_str_features
    func_features = {}
    func_str_features = {}
    num_features = len(g_features) + len(g_str_features)
    optionidx_map = get_optionidx_map(g_options)
    # This counts compiler-generated duplicates (.isra, .part, .cold)
    duplicate_cnt = 0
    for bin_path in bin_paths:
        package, compiler, arch, opti, bin_name = parse_fname(bin_path)
        others = parse_other_options(bin_path)
        _, func_data_list = load_func_data(bin_path, suffix="filtered2")
        for func_data in func_data_list:
            # Use only .text functions for testing
            # These are already filtered in filter_functions.py
            if func_data["seg_name"] != ".text":
                continue
            if func_data["name"].startswith("sub_"):
                continue
            #func_key = (package, bin_name, func_data["name"])
            func_key = (package, bin_name, func_data["src_file"],
                        func_data["src_line"])
            option_key = (opti, arch, compiler, others)
            if option_key not in optionidx_map:
                continue
            option_idx = optionidx_map[option_key]
            if func_key not in func_features:
                func_features[func_key] = {}
                func_str_features[func_key] = {}
            # in the below condition by using option_key instead of option_idx,
            # we can filter duplicate functions and only leave the last one.
            # TODO: move this filtering to filter_functions.py
            if option_key not in func_features[func_key]:
                func_features[func_key][option_idx] = np.zeros(
                    num_features, dtype=np.float64)
                func_str_features[func_key][option_idx] = []
            else:
                duplicate_cnt += 1

            for feature_idx, feature in enumerate(g_features):
                if feature not in func_data["feature"]:
                    continue
                val = func_data["feature"][feature]
                func_features[func_key][option_idx][feature_idx] = val

            for feature_idx, str_feature in enumerate(g_str_features):
                if str_feature not in func_data:
                    continue
                val = func_data[str_feature]
                if "type" in str_feature:
                    if not isinstance(val, list):
                        val = [val]
                    val = normalize_type(val)
                    val = list(enumerate(val))
                func_str_features[func_key][option_idx].append(val)

    return func_features, func_str_features, duplicate_cnt
Ejemplo n.º 3
0
def extract_features(bin_name):
    global feature_funcs
    bin_name, func_data_list = load_func_data(bin_name)
    fm = FeatureManager()
    for func_data in func_data_list:
        features = fm.get_all(func_data)
        func_data["feature"] = features
    store_func_data(bin_name, func_data_list)
Ejemplo n.º 4
0
def count_funcs(bin_path):
    # TODO: handle suffix correctly.
    #bin_path, func_data_list = load_func_data(bin_path)
    bin_path, func_data_list = load_func_data(bin_path, suffix="filtered")

    func_data_list = sorted(func_data_list, key=lambda x: x['name'])
    num_funcs = len(func_data_list)
    num_bbs = sum(map(lambda x: x['cfg_size'], func_data_list))

    return bin_path, num_funcs, num_bbs
Ejemplo n.º 5
0
def extract_func_types(args):
    type_map, bin_name = args
    bin_name, func_data_list = load_func_data(bin_name)
    for func in func_data_list:
        ret_type = fetch_type(type_map, func["ret_type"])
        arg_types = []
        for idx, var_name, t, _ in func["args"]:
            arg_types.append(fetch_type(type_map, t))
        func["abstract_args_type"] = arg_types
        func["abstract_ret_type"] = ret_type
    store_func_data(bin_name, func_data_list)
Ejemplo n.º 6
0
def extract_func_types(args):
    # TODO: handle suffix correctly.
    type_map, bin_name = args
    bin_name, func_data_list = load_func_data(bin_name, suffix="filtered")
    for func in func_data_list:
        ret_type = fetch_type(type_map, func["ret_type"])
        arg_types = []
        for idx, var_name, t, _ in func["args"]:
            arg_types.append(fetch_type(type_map, t))
        func["abstract_args_type"] = arg_types
        func["abstract_ret_type"] = ret_type
    store_func_data(bin_name, func_data_list, suffix="filtered")
Ejemplo n.º 7
0
def filter_funcs(bin_path):
    global g_oracle
    bin_path, func_data_list = load_func_data(bin_path)
    func_data_list = sorted(func_data_list, key=lambda x: x['name'])
    num_orig_funcs = len(func_data_list)
    pack_name = func_data_list[0]['package']

    # filter functions by segment name (consider functions in code segment)
    funcs = list(filter(lambda x: x['seg_name'] == '.text', func_data_list))
    num_code_funcs = len(funcs)

    funcs = list(filter(lambda x: 'src_path' in x and x['src_path'], funcs))
    num_src_funcs = len(funcs)

    # To identify functions inserted by compilers
    #for func in funcs:
    #    if func['package'] not in func['src_file']:
    #        print(func['name'], func['src_file'], func['src_line'])

    # filter functions by package name (remove functions inserted by compilers)
    funcs = list(filter(lambda x: pack_name in x['src_path'], funcs))
    num_pack_funcs = len(funcs)

    if num_pack_funcs == 0:
        print("No functions: ", pack_name, bin_path, num_orig_funcs)

    funcs = list(filter(lambda x: not x['name'].startswith('sub_'), funcs))
    num_sub_funcs = len(funcs)

    names = set(map(lambda x: x['name'], funcs))
    sources = set(map(lambda x: (x['src_file'], x['src_line']), funcs))

    if g_oracle:
        package, compiler, arch, opti, bin_name = parse_fname(bin_path)
        funcs = list(filter(
            lambda x:
            x['src_file'] in g_oracle[pack_name][bin_name]
            and x['src_line'] in g_oracle[pack_name][bin_name][x['src_file']],
            funcs))
        # TODO: handle suffix correctly.
        store_func_data(bin_path, funcs, suffix="filtered")
    num_oracle_funcs = len(funcs)
    num_readelf_funcs = 0
#    if g_oracle:
#        cmd = "readelf -s {} | grep FUNC | grep -v UND | wc -l".format(bin_path)
#        cmd = " objdump --syms -j .text {} | grep \"F .text\" | ".format(bin_path)
#        cmd += " cut -d \" \" -f 1 | sort | uniq | wc -l"
#        num_readelf_funcs = int(system(cmd))
    num_funcs = (num_orig_funcs, num_code_funcs, num_src_funcs, num_pack_funcs,
                 num_sub_funcs, num_oracle_funcs, num_readelf_funcs)
    return pack_name, bin_path, num_funcs, names, sources
Ejemplo n.º 8
0
def extract_features(bin_name):
    global feature_funcs
    # TODO: handle suffix correctly.
    bin_name, func_data_list = load_func_data(bin_name, suffix="filtered")
    fm = FeatureManager()
    for func_data in func_data_list:
        try:
            features = fm.get_all(func_data)
        except:
            import traceback
            traceback.print_exc()
            print("Error: ", bin_name)
            return
        func_data["feature"] = features
    store_func_data(bin_name, func_data_list, suffix="filtered2")
Ejemplo n.º 9
0
def extract_func_lineno(bin_name):
    try:
        bin_name, func_data_list = load_func_data(bin_name)
    except:
        print(bin_name)
        return bin_name

    func_addrs = dict(map(lambda x: (x["startEA"], x["name"]), func_data_list))
    line_map = fetch_lineno(bin_name, func_addrs)
    for func in func_data_list:
        func_addr = func["startEA"]
        if func_addr not in line_map or not line_map[func_addr][0]:
            continue
        func["src_path"] = line_map[func_addr][0]
        func["src_file"] = parse_source_path(func["src_path"])
        func["src_line"] = line_map[func_addr][1]
        # Fix ase18 source paths coreutils-6.7-6.5 / coreutils-6.7-6.7
        if 'coreutils-6.7-6.5' in func['src_path']:
            func['src_path'] = func['src_path'].replace('6.7-6.5', '6.5')
    store_func_data(bin_name, func_data_list)
    return