def get_similar_graphs(inputs): query_nodes, pid = inputs ground_truth = defaultdict(list) for i, q in enumerate(query_nodes): print('pid %d: %d / %d' % (pid, i, len(query_nodes))) query_subgraph = kg.subgraph( set(kg.predecessors(q)) | set(kg.successors(q)) | {q}) for n in kg.nodes: if n == q: continue if node2neighbor_num[n] != node2neighbor_num[q]: continue if abs(node2degree[n] - node2degree[q]) + abs(node2neighbor_edge_num[n] - node2neighbor_edge_num[q]) \ > MAX_GRAPH_DISTANCE_RATIO * len(query_subgraph.edges): continue candidate_subgraph = kg.subgraph( set(kg.predecessors(n)) | set(kg.successors(n)) | {n}) ged = nx.graph_edit_distance(query_subgraph, candidate_subgraph, edge_match=edge_match, roots=(q, n), timeout=TIMEOUT_FOR_CALCULATING_GED) if ged <= MAX_GRAPH_DISTANCE_RATIO * len(query_subgraph.edges): ground_truth[q].append(n) return ground_truth
def get_triples_score(self, predicted_triples, ground_truth_triples): def jaccard_similarity(list1, list2): s1 = set(list1) s2 = set(list2) return len(s1.intersection(s2)) / len(s1.union(s2)) # # Returns True if two nodes are considered the 'same', i.e. greater than 50% jaccard similarity. # def same_nodes(n1, n2): # t1 = n1['tokens'] # t2 = n2['tokens'] # #print(t1, t2, jaccard_similarity(t1, t2)) # return jaccard_similarity(t1, t2) >= 0.5 def create_graph(triples): G = nx.DiGraph() G.add_edges_from([(" ".join(t[0]), " ".join(t[2])) for t in triples]) for node in G.nodes: G.nodes[node]['tokens'] = node.split() return G assert (len(predicted_triples) == len(ground_truth_triples)) scores = [] for i in range( len(ground_truth_triples)): # Iterate over each document G_pred = create_graph(predicted_triples[i]) G_truth = create_graph(ground_truth_triples[i]) scores.append(nx.graph_edit_distance(G_pred, G_truth)) #, same_nodes)) return sum(scores) / len(scores)
def graph_similarity_measure(a, b): A = np.matrix(a) B = np.matrix(b) G1 = nx.from_numpy_matrix(A) G2 = nx.from_numpy_matrix(B) #Similarity 1 if a.shape == b.shape: edit_d = nx.graph_edit_distance(G1, G2) else: edit_d = 0 #Similarity2 iso = nx.is_isomorphic(G1, G2) d1 = np.sum(np.array(A)) d2 = np.sum(np.array(B)) d3 = max([d1, d2]) if iso: return 1 laplacian1 = nx.spectrum.laplacian_spectrum(G1) laplacian2 = nx.spectrum.laplacian_spectrum(G2) k1 = select_k(laplacian1) k2 = select_k(laplacian2) k = min(k1, k2) #Similarity 3 lap1 = scaling(laplacian1[:k]) lap2 = scaling(laplacian2[:k]) eig_similarity = sum((lap1 - lap2)**2) / float(k) #Similarity 4 steady_similarity = steady_sim(A, B) #Similarity 5 steady_similarity1 = steady_sim_neigh(A, B) return (0.25 * (1 - steady_similarity) + 0.25 * (1 - eig_similarity) + 0.25 * (edit_d / float(d3)) + 0.25 * (1 - steady_similarity1))
def edit_distance_yu(arch_1, arch_2): adj_1, ops_1 = arch_1[0], arch_1[1] adj_2, ops_2 = arch_2[0], arch_2[1] adj_1, ops_1 = preprocess_adj_op(adj_1, ops_1) adj_2, ops_2 = preprocess_adj_op(adj_2, ops_2) G1 = gen_graph(adj_1, ops_1) G2 = gen_graph(adj_2, ops_2) return int(nx.graph_edit_distance(G1, G2, node_match=node_match, edge_match=edge_match))
def calculate(self): new_g1 = DfgMetricUtil.remove_frequencies_from_labels(self.model1) new_g2 = DfgMetricUtil.remove_frequencies_from_labels(self.model2) # option for setting the timeout in the nx library # self.value = nx.graph_edit_distance(g1, g2, timeout=30) self.value = nx.graph_edit_distance(new_g1, new_g2) self.diff_added = set() self.diff_removed = set() return self.value, self.diff_added, self.diff_removed
def graph_edit_distance(adj_pred, adj_gt): eye = torch.eye(adj_pred.size(0)) adj_pred = adj_pred * (1 - eye) adj_gt = adj_gt * (1 - eye) adj_pred = (adj_pred > 0.5).type(torch.float32) g1 = nx.from_numpy_matrix(adj_pred.detach().numpy(), create_using=nx.Graph) g2 = nx.from_numpy_matrix(adj_gt.detach().numpy(), create_using=nx.Graph) ged = nx.graph_edit_distance(g1, g2) return ged
def test_get_di_graph(self): G_expected = nx.DiGraph() edges = [ ('start', 'move_0', 1), ('move_0', 'move_1', 1), ('move_1', 'end', 1), ] G_expected.add_weighted_edges_from(edges) self.assertEqual( nx.graph_edit_distance(G_expected, repeated.to_di_graph()), 0)
def topology(G_true, G_pred): '''Evaulate topology metrics. Parameters ---------- G_true : nx.Graph The reference graph. G_pred : nx.Graph The estimated graph. Returns ---------- res : dict a dict containing evaulation results. ''' res = {} # 1. Isomorphism with same initial node def comparison(N1, N2): if N1['is_init'] != N2['is_init']: return False else: return True score_isomorphism = int( nx.is_isomorphic(G_true, G_pred, node_match=comparison)) res['ISO score'] = score_isomorphism # 2. GED (graph edit distance) if len(G_true) > 10: warnings.warn( "Didn't calculate graph edit distances for large graphs.") res['GED score'] = np.nan else: max_num_oper = len(G_true) GED = nx.graph_edit_distance(G_pred, G_true, node_match=comparison, upper_bound=max_num_oper) if GED is None: res['GED score'] = 0 else: score_GED = 1 - GED / max_num_oper res['GED score'] = score_GED # 3. Ipsen-Mikhailov distance if len(G_true) == len(G_pred): score_IM = 1 - IM_dist(G_true, G_pred) score_IM = np.maximum(0, score_IM) else: score_IM = 0 res['IM score'] = score_IM return res
def getJsonData(graph_1, graph_2): g1_edgeList = [] g2_edgeList = [] # convert the node labels which are strings to sorted integers without affecting the node attributes. sortedIntGraph_1 = nx.relabel.convert_node_labels_to_integers( graph_1, first_label=0, ordering='sorted', label_attribute=None) sortedIntGraph_2 = nx.relabel.convert_node_labels_to_integers( graph_2, first_label=0, ordering='sorted', label_attribute=None) g1_edgeTuple = list(sortedIntGraph_1.edges(data=False)) g2_edgeTuple = list(sortedIntGraph_2.edges(data=False)) # get graph edge lists for i in g1_edgeTuple: g1_edgeList.append(list(i)) for i in g2_edgeTuple: g2_edgeList.append(list(i)) # get graph attributes in the ascending order as the node labels nodeLabelList_g1 = [] nodeLabelList_g2 = [] nodeList_g1 = list(sortedIntGraph_1.nodes(data=True)) nodeList_g2 = list(sortedIntGraph_2.nodes(data=True)) for i in range(len(nodeList_g1)): if nodeList_g1[i][0] == i: nodeLabelList_g1.insert( i, nodeList_g1[i][1].get('label').replace('"', '')) for i in range(len(nodeList_g2)): if nodeList_g2[i][0] == i: nodeLabelList_g2.insert( i, nodeList_g2[i][1].get('label').replace('"', '')) # get graph edit distance ged = nx.graph_edit_distance(sortedIntGraph_1, sortedIntGraph_2, node_match=return_eq) #ged = 2 '''only for testing. Comment while running it in production''' # generate the json files jsonDict = {} jsonDict["graph_1"] = g1_edgeList jsonDict["graph_2"] = g2_edgeList jsonDict["labels_1"] = nodeLabelList_g1 jsonDict["labels_2"] = nodeLabelList_g2 jsonDict["ged"] = int(ged) return jsonDict
def get_edit_distance(self, path1, path2): """Returns the minimum cost of edition from one path to another. We only take into account edge operations, weighting them by their distance attribute. Args: path1: A list of node ids in the graph representing the first path. path2: A list of node ids in the graph representing the second path. Returns: The edit distance, analogous to Levenshtein distance for strings, weighted by the distances of each edge in the graph. """ def get_nx_subgraph(path): """Creates a networkx graph from a list of nodes.""" graph = nx.Graph() for idx in range(len(path) - 1): cur_node = path[idx] next_node = path[idx + 1] connection_info = self.get_connection(cur_node, next_node) graph.add_edge( cur_node, next_node, weight=connection_info.distance, src=cur_node, tgt=next_node) return graph graph1 = get_nx_subgraph(path1) graph2 = get_nx_subgraph(path2) def edge_subst_cost(edge1, edge2): """Substituition cost for edges.""" if edge1['src'] == edge2['src'] and edge1['tgt'] == edge2['tgt']: return 0 return abs(edge1['weight']) + abs(edge2['weight']) edge_del_or_ins_cost = lambda e: abs(e['weight']) node_op_cost = lambda *args, **kwargs: 0 # Only measure edge similarity. return nx.graph_edit_distance( graph1, graph2, node_subst_cost=node_op_cost, node_del_cost=node_op_cost, node_ins_cost=node_op_cost, edge_subst_cost=edge_subst_cost, edge_del_cost=edge_del_or_ins_cost, edge_ins_cost=edge_del_or_ins_cost)
def show_edit_distance(): if len(sys.argv) < 3: print(f'Usage: {sys.argv[0]} <network 0> <network 1>') return start_time = time.time() net0 = fio.read_network(sys.argv[1]) net1 = fio.read_network(sys.argv[2]) distance = nx.graph_edit_distance(net0.G, net1.G) name0 = fio.get_network_name(sys.argv[1]) name1 = fio.get_network_name(sys.argv[2]) print( f'Distance between {name0} and {name1}: {distance} ({time.time()-start_time} s)' )
def get_graph_edit_distance(): # calculate the graph edit distance between all trees # okay this takes extreme long -> only for small trees nodes(tree)=12 max ged_dict = {} for pa in pat_id_list: for pb in pat_id_list: if pa == pb: break print(f"\ncalc ged from {pa} - {pb}") print( f"nodes: {tree_dict[pa].number_of_nodes()} <---> {tree_dict[pb].number_of_nodes()}" ) key = f"{pa}~{pb}" ged_dict.update( {key: nx.graph_edit_distance(tree_dict[pa], tree_dict[pb])}) print(f"{key} --> {tree_dict.get(key)}")
def similarity(graphs): n = len(graphs) sim_mat = [[0] * n for i in range(n)] #calculate similarity for i in range(n): for j in range(i, n): print("Calculating similarity for ", i, j) if i == j: sim_mat[i][j] = 0.0 continue sim = nx.graph_edit_distance(graphs[i], graphs[j]) #for v in nx.optimize_graph_edit_distance(graphs[i][1], graphs[j][1]): # sim = v sim_mat[i][j] = sim sim_mat[j][i] = sim print("similarity for ", i, j, sim)
def get_edit_distance(matrix1, ops1, matrix2, ops2, upper_bound): def to_nx_graph(matrix, ops): G = nx.from_numpy_array(matrix, create_using=nx.DiGraph) for idx, op in enumerate(ops): G.add_node(idx, operation=op) return G def node_match(node1, node2): return node1["operation"] == node2["operation"] def edge_match(edge1, edge2): return edge1 == edge2 G1 = to_nx_graph(matrix1, ops1) G2 = to_nx_graph(matrix2, ops2) return nx.graph_edit_distance(G1, G2, node_match=node_match, edge_match=edge_match, upper_bound=upper_bound) or 0
def compute_similarity_matrix(self, word_list, neighbor_size): G = self.network[2].to_undirected() l = len(word_list) graph_list = [] for word in word_list: g = self.get_sized_neighbor(word, neighbor_size) graph_list.append(g) # print(word, g.nodes) # print() similarity_matrix = np.zeros((l, l), float) for i in range(l): for j in range(i, l): similarity_matrix[i][j] = round(1 / (1 + nx.graph_edit_distance(graph_list[i], graph_list[j])), 3) # print(word_list[i],word_list[j],similarity_matrix[i][j]) table = pandas.DataFrame(similarity_matrix, word_list, word_list) return table, similarity_matrix
def ged(self, g1, g2, node_weight_mode='proportional'): '''#need to incorporate edges attributes in order for ged edge costs to work correctly for e, attr in g1.edges.items(): #pdb.set_trace() attr['nodes'] = '{}-{}'.format(e[0],e[1]) for e, attr in g2.edges.items(): attr['nodes'] = '{}-{}'.format(e[0],e[1])''' def edge_subst_cost(gattr, hattr): if (gattr['relation'] == hattr['relation'] ): #and (gattr['nodes'] == hattr['nodes']): return 0 else: return 1 def node_subst_cost_proportional(uattr, vattr): cost = 0 attributes = list(uattr.keys()) unit_cost = 1 / len(attributes) for attr in attributes: if (uattr[attr] != vattr[attr]): cost = cost + unit_cost return cost def node_subst_cost_atleastone(uattr, vattr): cost = 0 attributes = list(uattr.keys()) for attr in attributes: if (uattr[attr] != vattr[attr]): cost = 1 break return cost if node_weight_mode == 'proportional': node_subst_cost = node_subst_cost_proportional elif node_weight_mode == 'atleastone': node_subst_cost = node_subst_cost_atleastone else: raise ValueError( 'Node weight mode {} not known'.format(node_weight_mode)) return nx.graph_edit_distance(g1, g2, edge_subst_cost=edge_subst_cost, node_subst_cost=node_subst_cost)
def compare_with_other_graph_edit(self, other_syn_tree, upper_bound=10): # == As a measure we are going to get graph edit distance this_tree = self.tree.copy() other_tree = other_syn_tree.tree.copy() # We're going to put the node name into the node attribute dict for node in this_tree.nodes: this_tree.nodes[node]['name'] = node for node in other_tree.nodes: other_tree.nodes[node]['name'] = node def node_comparison_func(node1_dict, node2_dict): return node1_dict['name'] == node2_dict['name'] edit_distance = nx.graph_edit_distance(this_tree, other_tree, node_match=node_comparison_func, upper_bound=upper_bound) if edit_distance is None: edit_distance = upper_bound + 1 return edit_distance
if __name__ == "__main__": args = parse_args() if (args.g1 is None or args.g2 is None): print("g1 or g2 is not initialized") else: GREC = GRECCostFunctions() g1 = loadGXL(args.g1) g2 = loadGXL(args.g2) # if you want to only output the distance, you can use the graph_edit_distance as follows: distance = nx.graph_edit_distance( g1, g2, node_subst_cost=GREC.node_substitution_cost, node_del_cost=GREC.node_deletion_cost, node_ins_cost=GREC.node_insertion_cost, edge_subst_cost=GREC.edge_substitution_cost, edge_del_cost=GREC.edge_deletion_cost, edge_ins_cost=GREC.node_insertion_cost) print("optimal distance :::", distance) # if you want to output the distance and all the possible optimal pathz, you can use the optimal_edit_paths function paths, cost = nx.optimal_edit_paths( g1, g2, node_subst_cost=GREC.node_substitution_cost, node_del_cost=GREC.node_deletion_cost, node_ins_cost=GREC.node_insertion_cost,
adj4 = np.array([[0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) op4 = np.array([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) adj4, op4 = preprocess_adj_op(adj4, op4) G1 = gen_graph(adj1, op1) G2 = gen_graph(adj2, op2) G3 = gen_graph(adj3, op3) G4 = gen_graph(adj4, op4) plt.subplot(141) nx.draw(G1, with_labels=True, font_weight='bold') plt.subplot(142) nx.draw(G2, with_labels=True, font_weight='bold') plt.subplot(143) nx.draw(G3, with_labels=True, font_weight='bold') plt.subplot(144) nx.draw(G4, with_labels=True, font_weight='bold') nx.graph_edit_distance(G1, G2, node_match=node_match, edge_match=edge_match) nx.graph_edit_distance(G2, G3, node_match=node_match, edge_match=edge_match)
def my_func(g1,g2): if g1.size()<g2.size(): ged=int(nx.graph_edit_distance(g1, g2)) else: ged=int(nx.graph_edit_distance(g2, g1)) return ged
def test_build_graph(name, edges, fd_in, fd_out, exp_paths): exp_G = nx.MultiDiGraph() exp_G.add_edges_from(edges) obs_G = ap.build_graph(fd_in, fd_out) assert nx.graph_edit_distance(exp_G, obs_G) == 0
G1.add_edges_from(edges_indices) dir = os.path.join(load_dir, "{}/free_{}_fix_8/{}".format(i, node, last_data)) nodes_pos = np.load(os.path.join(dir, 'nodes_pos.npy')) edges_indices = np.load(os.path.join(dir, 'edges_indices.npy')) edges_thickness = np.load(os.path.join(dir, 'edges_thickness.npy')) input_nodes = np.load(os.path.join(dir, 'input_nodes.npy')).tolist() input_vectors = np.load(os.path.join(dir, 'input_vectors.npy')) frozen_nodes = np.load(os.path.join(dir, 'frozen_nodes.npy')).tolist() output_nodes = np.load(os.path.join(dir, 'output_nodes.npy')).tolist() output_vectors = np.load(os.path.join(dir, 'output_vectors.npy')) G2 = nx.Graph() G2.add_nodes_from(np.arange(len(nodes_pos))) edge_info = np.concatenate( [edges_indices, edges_thickness.reshape((-1, 1))], axis=1) G2.add_edges_from(edges_indices) print("計算中") # for v in nx.optimize_graph_edit_distance(G1, G2): # minv = v # print(minv) minv = nx.graph_edit_distance(G1, G2) data_GED_list.append(minv) np.save(os.path.join(load_dir, "GED_{}_{}.npy".format(i, node)), minv) GED_list.append(data_GED_list) print(GED_list) np.save(os.path.join(load_dir, "GED.npy"), GED_list)
node = q.pop() if node and node['element'].name == "body": graph.add_node(node_id, element=node['element'].name) node_id += 1 root_id = node['root_id'] labels[root_id] = node['element'].name for t in node['element'].contents: if t and t.name: graph.add_node(node_id, element=t.name) graph.add_edge(root_id, node_id) q.appendleft({"element": t, "root_id": node_id}) node_id += 1 return graph, labels graph1, labels = html_to_dom_tree(soup.find("body")) graph2, _ = html_to_dom_tree(soup.find("body")) #nx.draw(graph1, labels=labels, with_labels = True) #plt.show() #https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.similarity.graph_edit_distance.html dist = nx.graph_edit_distance(graph1, graph2) print(dist)
mng1_mng2_head.append(matrixDistance(mng1_3, mng2_3)) mng1_mng2_head.append(matrixDistance(mng1_3, mng2_4)) dist_mng1_mng2_head = sum(mng1_mng2_head) # mng1とmng2の上段の距離の合計 # 下段 mng1_mng2_bottom = [] # 下段の距離 mng1_mng2_bottom.append(matrixDistance(mng1_4, mng2_5)) mng1_mng2_bottom.append(matrixDistance(mng1_4, mng2_6)) mng1_mng2_bottom.append(matrixDistance(mng1_4, mng2_7)) mng1_mng2_bottom.append(matrixDistance(mng1_5, mng2_5)) mng1_mng2_bottom.append(matrixDistance(mng1_5, mng2_6)) mng1_mng2_bottom.append(matrixDistance(mng1_5, mng2_7)) dist_mng1_mng2_bottom = sum(mng1_mng2_bottom) # mng1とmng2の下段の距離の合計 print("mng1とmng2の距離") d_m1m2 = (dist_mng1_mng2_head/12)+(dist_mng1_mng2_bottom/6) # 上段下段それぞれ総当たりの回数で割って平均を取り,さらに上下の平均を取る print(d_m1m2/2) distance1 = nx.graph_edit_distance(g,g2) # グラフgとグラフg2の距離 print("mng1とmng2のグラフの距離") print(distance1) print("\n\n\n") # 実験2 mng1とmng3の類似度--------------------------------------------------------- mng1_mng3_head = [] # 上段の距離 mng1_mng3_head.append(matrixDistance(mng1_1, mng3_1)) # mng1_1とmng3-1の距離 mng1_mng3_head.append(matrixDistance(mng1_1, mng3_2)) mng1_mng3_head.append(matrixDistance(mng1_2, mng3_1)) mng1_mng3_head.append(matrixDistance(mng1_2, mng3_2)) mng1_mng3_head.append(matrixDistance(mng1_3, mng3_1)) mng1_mng3_head.append(matrixDistance(mng1_3, mng3_2)) dist_mng1_mng3_head = sum(mng1_mng3_head) # mng1とmng2の上段の距離の合計
def calculate_ged_NX(g1, g2, aids=False): if aids: x = node_match_equality else: x = None return nx.graph_edit_distance(g1, g2, node_match=x)
def edit_distance_exact(g1, g2): return networkx.graph_edit_distance(networkx.from_numpy_matrix(g1), networkx.from_numpy_matrix(g2))
def _single_distance(self, g1, g2, verbose=False): from networkx import graph_edit_distance return graph_edit_distance(g1, g2, node_subst_cost=self.node_cost, edge_subst_cost=self.edge_cost)
u2 = np.sort(eigvals(adj2)) return np.abs(np.sqrt(np.sum(np.square(u1-u2)+np.square(v1-v2)))) # %% Init repeat_n = 10 G1 = [] G2 = [] for i in range(repeat_n): G1.append(nx.erdos_renyi_graph(50,0.8)) G2.append(nx.erdos_renyi_graph(50,0.8)) # %% edit distance start_time = time() for i in range(repeat_n): print(i) nx.graph_edit_distance(G1[i], G2[i]) end_time = time() print((end_time-start_time)/repeat_n) # %% spectral distance start_time = time() dist = netrd.distance.IpsenMikhailov() for i in range(repeat_n): print(i) dist.dist(G1[i], G2[i]) end_time = time() print((end_time-start_time)/repeat_n) # %% correlation distance start_time = time() for i in range(repeat_n): print(i) correlation_distance(G1[i], G2[i])
def __eq__(self, other): return type(self) == type(other) and nx.graph_edit_distance( self.to_di_graph(), other.to_di_graph()) == 0
edge_match = lambda e1, e2: e1["type"] == e2["type"] #print(nx.graph_edit_distance(graphs[0], graphs[1])) output = [] print(len(train_graphs), len(test_graphs)) for graph, elem in test_graphs: #if len(graph.nodes) > 500: continue print("Test graph is size:", len(graph.nodes)) for graph2, elem2 in tqdm(train_graphs): #if len(graph2.nodes) > 500: continue edit_distance = nx.graph_edit_distance(graph, graph2, node_match, edge_match, timeout=10) if not edit_distance: continue print("(train: %s %s, test: %s %s), %.2f" % (elem2["fname"], elem2["og_expr"], elem["fname"], elem["og_expr"], edit_distance)) output.append({ "fname1": elem["fname"], "fname2": elem2["fname"], "expr1": elem["og_expr"], "expr2": elem["og_expr"], "edit_distance": edit_distance })