Beispiel #1
0
 def find_top_k_similar_graphs(self,
                               graph_dot_file,
                               graph_name,
                               k,
                               num_iter,
                               cluster_json=None):
     gk = GraphKernel(graph_name)
     gk.read_dot_graph(graph_dot_file)
     if cluster_json:
         label_map = gk.read_cluster_info(cluster_json)
         gk.relabel_graph(label_map)
     gk.init_wl_kernel()
     wl = gk.compute_wl_kernel(num_iter)
     #graph_size = gk.g.number_of_nodes()
     similarity_vector = self.compute_similarity_using_stored_vectors(
         wl, num_iter)
     wl_pairs = list(zip(self.graphs, similarity_vector))
     wl_pairs.sort(key=lambda x: x[1], reverse=True)
     if k > len(wl_pairs):
         logging.warning(
             "Trying to select {0} programs out of only {1} programs.".
             format(k, len(wl_pairs)))
         #return zip(*wl_pairs)[0]
         return wl_pairs[0]
     else:
         #return zip(*wl_pairs)[0][:k]
         return wl_pairs[:k]
kernel_file = sys.argv[2]  # write to

num_iter = 3  # WL-Kernel iteration number

total_node_count = 0
total_relabel_count = 0

fo = open(kernel_file, 'w')
for r, ds, fs in os.walk(repo_dir):
    for f in fnmatch.filter(fs, '*.dot'):
        # build graph kerenel
        # print f
        gk = GraphKernel(f)
        gk.read_dot_graph(os.path.join(r, f))
        if len(sys.argv) == 4:
            label_map = gk.read_cluster_info(sys.argv[3])
            relabel_count = gk.relabel_graph(label_map)
            total_node_count += gk.g.number_of_nodes()
            total_relabel_count += relabel_count
            print("Relabeled {0} out of {1} nodes in {2}.".format(
                relabel_count, gk.g.number_of_nodes(), gk.dot_file))
        gk.init_wl_kernel()
        wls = gk.compute_wl_kernel(num_iter)
        wl_str = "###".join([
            ";;;".join([",,,".join([str(x), str(y)]) for (x, y) in wl])
            for wl in wls
        ])
        fo.write(
            os.path.join(
                os.path.abspath(os.path.join(r, f)) + '\t' + wl_str + '\t' +
                str(gk.g.number_of_nodes()) + '\n'))