Beispiel #1
0
def get_function_store(dataset, is_test=False):
    global _FUNCTION_STORE
    if _FUNCTION_STORE is not None:
        return _FUNCTION_STORE
    if properties.STORE != "mongo":
        raise RuntimeError(
            "Currently supports only mongo store. Not supported for '%s'" %
            properties.STORE)
    _FUNCTION_STORE = mongo_store.FunctionStore(dataset, is_test=is_test)
    return _FUNCTION_STORE
Beispiel #2
0
def remove_overlapping_clusters(dataset, language="java_python"):
    # TODO: Think about how to remove syntactic equivalence
    store = mongo_store.FunctionStore(dataset)
    base_file = os.path.join(properties.META_RESULTS_FOLDER, dataset,
                             "clusters", "%s.pkl" % language)
    clusters = cache.load_pickle(base_file)
    non_overlapping_clusters = {}
    for label, functions in clusters.items():
        if label == -1 or len(functions) == 1: continue
        non_overlapping_funcs = []
        metas = {}
        for func in functions:
            meta = store.load_metadata({"name": func.base_name})
            metas[func.base_name] = meta
            if len(non_overlapping_funcs) == 0:
                non_overlapping_funcs.append(func)
                continue
            is_non_overlapping_funcs_updated = False
            for i, existing_func in enumerate(non_overlapping_funcs):
                existing_meta = metas[existing_func.base_name]
                if overlaps(meta, existing_meta):
                    is_non_overlapping_funcs_updated = True
                    if is_more_succinct(meta, existing_meta):
                        non_overlapping_funcs[i] = func
                    break
            if not is_non_overlapping_funcs_updated:
                non_overlapping_funcs.append(func)
        if len(non_overlapping_funcs) > 1:
            non_overlapping_clusters[label] = non_overlapping_funcs
    write_folder = os.path.join(properties.META_RESULTS_FOLDER, dataset,
                                "clusters", "non_overlapping")
    cache.mkdir(write_folder)
    clusters_txt_file = os.path.join(write_folder, "%s.txt" % language)
    clusters_pkl_file = os.path.join(write_folder, "%s.pkl" % language)
    clusters_report_file = os.path.join(write_folder, "%s.md" % language)
    cache.save_pickle(clusters_pkl_file, non_overlapping_clusters)
    clusterer.save_clusters_to_db(dataset, non_overlapping_clusters,
                                  "non_overlapping")
    clusterer.save_clusters_to_txt(non_overlapping_clusters, clusters_txt_file)
    sizes = [
        len(cluster_funcs)
        for label, cluster_funcs in non_overlapping_clusters.items()
        if label != -1
    ]
    meta_data = "## Cluster sizes\n"
    meta_data += "* Number of clusters: %d\n" % len(non_overlapping_clusters)
    meta_data += "* Number of functions clustered: %d\n" % sum(sizes)
    meta_data += "## REPORT\n"
    meta_data += Stat(sizes).report()
    cache.write_file(clusters_report_file, meta_data)
Beispiel #3
0
def get_store(dataset, is_test=False):
  if properties.STORE == "json":
    return json_store.FunctionStore(dataset)
  elif properties.STORE == "mongo":
    return mongo_store.FunctionStore(dataset, is_test=is_test)
  raise RuntimeError("Invalid configuration: %s" % properties.STORE)