def find_top_k_similar_graphs(self, graph_dot_file, graph_name, k, num_iter, cluster_json=None): gk = GraphKernel(graph_name) gk.read_dot_graph(graph_dot_file) if cluster_json: label_map = gk.read_cluster_info(cluster_json) gk.relabel_graph(label_map) gk.init_wl_kernel() wl = gk.compute_wl_kernel(num_iter) #graph_size = gk.g.number_of_nodes() similarity_vector = self.compute_similarity_using_stored_vectors( wl, num_iter) wl_pairs = list(zip(self.graphs, similarity_vector)) wl_pairs.sort(key=lambda x: x[1], reverse=True) if k > len(wl_pairs): logging.warning( "Trying to select {0} programs out of only {1} programs.". format(k, len(wl_pairs))) #return zip(*wl_pairs)[0] return wl_pairs[0] else: #return zip(*wl_pairs)[0][:k] return wl_pairs[:k]
num_iter = 3 # WL-Kernel iteration number total_node_count = 0 total_relabel_count = 0 fo = open(kernel_file, 'w') for r, ds, fs in os.walk(repo_dir): for f in fnmatch.filter(fs, '*.dot'): # build graph kerenel # print f gk = GraphKernel(f) gk.read_dot_graph(os.path.join(r, f)) if len(sys.argv) == 4: label_map = gk.read_cluster_info(sys.argv[3]) relabel_count = gk.relabel_graph(label_map) total_node_count += gk.g.number_of_nodes() total_relabel_count += relabel_count print("Relabeled {0} out of {1} nodes in {2}.".format( relabel_count, gk.g.number_of_nodes(), gk.dot_file)) gk.init_wl_kernel() wls = gk.compute_wl_kernel(num_iter) wl_str = "###".join([ ";;;".join([",,,".join([str(x), str(y)]) for (x, y) in wl]) for wl in wls ]) fo.write( os.path.join( os.path.abspath(os.path.join(r, f)) + '\t' + wl_str + '\t' + str(gk.g.number_of_nodes()) + '\n')) fo.close()