Ejemplo n.º 1
0
def async_trim_funcs(func_list, file_name):
    return fhc.trim_funcs(func_list, file_name=file_name)
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("Directory")
    parser.add_argument("-L",
                        "--LD_PATH",
                        default="",
                        help="Path to libraries to load")
    parser.add_argument("-F", "--Function", default="")
    parser.add_argument("-V", "--Vuln_Pickle", default="")

    args = parser.parse_args()

    executables, shared_libs = fhc.get_executable_files(args.Directory)

    all_files = executables
    #all_files = executables + shared_libs

    # all_arg_funcs = async_and_iter(async_get_arg_funcs, all_files)

    if args.Vuln_Pickle:
        with open(args.Vuln_Pickle, 'rb') as f:
            file_vulnerabilities = pickle.load(f)
        pass
    else:
        file_vulnerabilities = vd.process_file_or_folder(
            args.Directory, args.LD_PATH)
        with open('cluster_pickle', 'wb') as f:
            pickle.dump(file_vulnerabilities, f, -1)

    print("[+] Getting sparse functions")

    all_functions = []
    all_trim_funcs = []

    for function_list in async_and_iter(async_get_sparse_file_data, all_files):
        all_functions.extend(copy.deepcopy(function_list))
        all_trim_funcs.extend(
            fhc.trim_funcs(function_list, function_list[0]['file_name']))

    all_functions = fhc.remove_non_needed_functions(all_functions,
                                                    remove_features=False)
    all_trim_funcs = fhc.remove_non_needed_functions(all_trim_funcs)

    print("[+] Clustering and scoring centroid counts")
    all_scores = async_and_iter_clusters(all_trim_funcs, 50)

    largest_dif = 200
    large_index = 0
    for x in range(1, len(all_scores) - 2):
        if largest_dif > all_scores[x]['score']:
            largest_dif = all_scores[x]['score']
            large_index = x
        '''
        if largest_dif < abs(scores[x] - scores[x+1]):
                largest_dif = abs(scores[x] - scores[x+1])
                large_index = x+1
        '''
    print("Largest drop at {} with {}".format(all_scores[large_index]['count'],
                                              largest_dif))

    Largest_Score_Drop = all_scores[large_index]
    function_distances = fhc.get_cosine_dist(all_trim_funcs)

    if args.Function:
        bugs = [x for x in file_vulnerabilities if args.Function in x['name']]
    else:
        bugs = [x for x in file_vulnerabilities if x['result']]

    for file_vuln in bugs:
        vuln_index = get_function_index(file_vuln, all_functions)
        if vuln_index is None:
            continue
        vuln_cluster = Largest_Score_Drop['labels'][vuln_index]
        similar_list = get_functions_on_cluster(all_functions, vuln_cluster,
                                                Largest_Score_Drop['labels'])

        reduced_list = []
        for func in similar_list:
            func_distance = get_func_dist(file_vuln, func, function_distances,
                                          all_functions)
            reduced_list.append({
                'file_name': func['file_name'],
                'func_name': func['name'],
                'distance': func_distance
            })

        reduced_list = sorted(reduced_list, key=lambda x: x['distance'])
        file_vuln['Similar_Funcs'] = reduced_list

    if bugs:
        print_function(bugs[0])