def test_pagerank_by_hand(): graph = Graph('gr', 'gr.xml', 4) graph.add_node(Node(0, LabelNodeLetter(0, 0))) graph.add_node(Node(1, LabelNodeLetter(0, 0))) graph.add_node(Node(2, LabelNodeLetter(0, 0))) graph.add_node(Node(3, LabelNodeLetter(0, 0))) graph.add_edge(Edge(0, 1, LabelEdge(0))) graph.add_edge(Edge(1, 2, LabelEdge(0))) graph.add_edge(Edge(2, 3, LabelEdge(0))) pagerank = PageRank() results = pagerank.calc_centrality_score(graph) results = np.asarray(results) graph2 = nx.Graph() graph2.add_node(1) graph2.add_node(2) graph2.add_node(3) graph2.add_node(4) graph2.add_edge(1, 2) graph2.add_edge(2, 3) graph2.add_edge(3, 4) expected = np.array([val for _, val in nx.pagerank_scipy(graph2).items()]) print(results) assert np.linalg.norm(results - expected) < 1e-6
def test_betweenness_by_hand(): graph = Graph('gr', 'gr.xml', 4) graph.add_node(Node(0, LabelNodeLetter(0, 0))) graph.add_node(Node(1, LabelNodeLetter(0, 0))) graph.add_node(Node(2, LabelNodeLetter(0, 0))) graph.add_node(Node(3, LabelNodeLetter(0, 0))) graph.add_edge(Edge(0, 1, LabelEdge(0))) graph.add_edge(Edge(1, 2, LabelEdge(0))) graph.add_edge(Edge(2, 3, LabelEdge(0))) betweenness = Betweenness() results = betweenness.calc_centrality_score(graph) results = np.asarray(results) graph2 = nx.Graph() graph2.add_node(1) graph2.add_node(2) graph2.add_node(3) graph2.add_node(4) graph2.add_edge(1, 2) graph2.add_edge(2, 3) graph2.add_edge(3, 4) expected_dict = nx.betweenness_centrality(graph2, normalized=False) expected = np.array([val for _, val in expected_dict.items()]) print(results) assert np.linalg.norm(results - expected) < 1e-6
def test_dirac_mutagenicity_norm(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeMutagenicity(*coord1)) node1 = Node(1, LabelNodeMutagenicity(*coord2)) edit_cost = EditCostMutagenicity(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_nci1_norm(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeNCI1(*coord1)) node1 = Node(1, LabelNodeNCI1(*coord2)) edit_cost = EditCostNCI1(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_proteins_tu_substitution(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeProteinsTU(*coord1)) node1 = Node(1, LabelNodeProteinsTU(*coord2)) edit_cost = EditCostProteinsTU(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_collab_substitution(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeCollab(*coord1)) node1 = Node(1, LabelNodeCollab(*coord2)) edit_cost = EditCostCollab(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_reddit_binary_substitution(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeRedditBinary(*coord1)) node1 = Node(1, LabelNodeRedditBinary(*coord2)) edit_cost = EditCostRedditBinary(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_enzymes_substitution(vec1, vec2, e_cost, expected): node0 = Node(0, LabelNodeEmbedding(np.array(vec1, dtype=np.float64))) node1 = Node(1, LabelNodeEmbedding(np.array(vec2, dtype=np.float64))) edit_cost = EditCostGNNEmbedding(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected
def test_dirac_protein_norm(coord1, coord2, e_cost, expected): node0 = Node(0, LabelNodeProtein(*coord1)) node1 = Node(1, LabelNodeProtein(*coord2)) edit_cost = EditCostProtein(*e_cost) result = edit_cost.cost_substitute_node(node0, node1) assert result == expected assert edit_cost.cost_insert_node(node0) == e_cost[0] assert edit_cost.cost_delete_node(node1) == e_cost[1]
def test_copy_graph(my_graph): my_graph.add_node(Node(0, LabelNodeLetter(1, 1))) my_graph.add_node(Node(1, LabelNodeLetter(2, 3))) my_graph.add_edge(Edge(0, 1, LabelEdge(0))) import copy new_graph = copy.deepcopy(my_graph) my_graph.remove_node_by_idx(0) print(new_graph) print(my_graph)
def test_manhattan_norm(coord1, coord2, epsilon): node0 = Node(0, LabelNodeLetter(*coord1)) node1 = Node(1, LabelNodeLetter(*coord2)) edit_cost = EditCostLetter(1., 1., 1., 1., 'manhattan') result = edit_cost.cost_substitute_node(node0, node1) arr1 = np.array(coord1) arr2 = np.array(coord2) assert abs(result - np.linalg.norm(arr1 - arr2, 1)) < epsilon assert result == np.linalg.norm(arr1 - arr2, 1)
def test_1_node_same(ged): gr_src = Graph('gr1', 'gr1.gxl', 1) gr_trgt = Graph('gr2', 'gr2.gxl', 1) gr_src.add_node(Node(0, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_trgt.add_node(Node(0, LabelNodeAIDS('C', 1, 1, 2., 2.))) dist = ged.compute_edit_distance(gr_src, gr_trgt) expected_dist = 0. assert dist == expected_dist
def test_mutagenicity_with_deleted_nodes(mutagenicity_graphs, dataframe_mutagenicity, graph_name_source_target): gr_name_src, gr_name_trgt = ['mutagen/' + name for name in graph_name_source_target] graph_name_source, graph_name_target = graph_name_source_target graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0] cst_cost_node = 11.0 cst_cost_edge = 1.1 ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac')) # Reproduce the source graph with more nodes new_gr_src = Graph(gr_name_src, 'gr.xls', len(graph_source)+2) for node in graph_source.nodes: new_gr_src.add_node(node) for idx, edges in graph_source.get_edges().items(): for edge in edges: if edge is None: continue new_gr_src.add_edge(edge) new_gr_src.add_node(Node(len(graph_source), LabelNodeMutagenicity('C'))) new_gr_src.add_node(Node(len(graph_source)+1, LabelNodeMutagenicity('N'))) # Add random Edges for _ in range(4): new_gr_src.add_edge(Edge(len(graph_source), random.randint(0, len(graph_source)-1), LabelEdge(0))) for _ in range(6): new_gr_src.add_edge(Edge(len(graph_source) + 1, random.randint(0, len(graph_source)), LabelEdge(0))) new_gr_src.remove_node_by_idx(len(graph_source)) new_gr_src.remove_node_by_idx(len(graph_source)) results = ged.compute_edit_distance(new_gr_src, graph_target) expected = dataframe_mutagenicity.loc[gr_name_src, gr_name_trgt] # import numpy as np # np.savetxt(f'_c_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C), fmt='%10.3f', delimiter=';') # np.savetxt(f'c_star_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C_star), fmt='%10.3f', delimiter=';') print(f'###### diff {results - expected}') print(f'{graph_name_source_target}: new dist {results} - old dist {expected}') print(f'exp {expected}') assert results == expected
def test_remove_node(num_nodes, idx_to_remove, expected_adj): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) nodes = [] for i in range(num_nodes): tmp_node = Node(i, LabelNodeLetter(1+i, 1)) nodes.append(tmp_node) my_graph.add_node(tmp_node) for idx_start, idx_end in combinations(range(num_nodes), 2): tmp_edge = Edge(idx_start, idx_end, LabelEdge(0)) my_graph.add_edge(tmp_edge) my_graph.remove_node_by_idx(idx_to_remove) # nodes.pop(0) # expected edges # expected_adjacency_mat = np.array([[0, 1, 1], # [1, 0, 1], # [1, 1, 0]]) print(f'--{nodes}') print(my_graph.get_nodes()) # assert my_graph.get_nodes() == nodes assert len(my_graph) == num_nodes - 1
def test_1_node(ged): gr_src = Graph('gr1', 'gr1.gxl', 1) gr_trgt = Graph('gr2', 'gr2.gxl', 1) gr_src.add_node(Node(0, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_trgt.add_node(Node(0, LabelNodeAIDS('O', 1, 1, 2., 2.))) dist = ged.compute_edit_distance(gr_src, gr_trgt) expected_dist = 2.2 print(dist) print(ged.C.base) print(ged.C_star.base) print(ged.phi.base) assert dist == expected_dist
def test_dirac_enzymes_delete_node(coord1, e_cost, expected): node0 = Node(0, LabelNodeEnzymes(*coord1)) edit_cost = EditCostEnzymes(*e_cost) result = edit_cost.cost_delete_node(node0) assert result == expected
def test_dirac_proteins_tu_delete_node(coord1, e_cost, expected): node0 = Node(0, LabelNodeProteinsTU(*coord1)) edit_cost = EditCostProteinsTU(*e_cost) result = edit_cost.cost_delete_node(node0) assert result == expected
def test_dirac_enzymes_delete_node(e_cost, expected): node0 = Node(0, LabelNodeEmbedding(np.ones(5))) edit_cost = EditCostGNNEmbedding(*e_cost) result = edit_cost.cost_delete_node(node0) assert result == expected
def test_dirac_reddit_binary_delete_node(coord1, e_cost, expected): node0 = Node(0, LabelNodeRedditBinary(*coord1)) edit_cost = EditCostRedditBinary(*e_cost) result = edit_cost.cost_delete_node(node0) assert result == expected
def test_dirac_collab_delete_node(coord1, e_cost, expected): node0 = Node(0, LabelNodeCollab(*coord1)) edit_cost = EditCostCollab(*e_cost) result = edit_cost.cost_delete_node(node0) assert result == expected
def define_graphs(): n, m = 4, 3 graph_source = Graph('gr_source', 'gr_source.gxl', n) graph_target = Graph('gr_target', 'gr_targe.gxl', m) # Init graph source: add nodes and edges graph_source.add_node(Node(0, LabelNodeLetter(1, 0))) graph_source.add_node(Node(1, LabelNodeLetter(2, 0))) graph_source.add_node(Node(2, LabelNodeLetter(1, 0))) graph_source.add_node(Node(3, LabelNodeLetter(3, 0))) graph_source.add_edge(Edge(0, 1, LabelEdge(0))) graph_source.add_edge(Edge(1, 2, LabelEdge(0))) graph_source.add_edge(Edge(1, 3, LabelEdge(0))) graph_source.add_edge(Edge(2, 3, LabelEdge(0))) # Init graph target: add nodes and edges graph_target.add_node(Node(0, LabelNodeLetter(3, 0))) graph_target.add_node(Node(1, LabelNodeLetter(2, 0))) graph_target.add_node(Node(2, LabelNodeLetter(2, 0))) graph_target.add_edge(Edge(0, 1, LabelEdge(0))) graph_target.add_edge(Edge(1, 2, LabelEdge(0))) return graph_source, graph_target
def test_2_node(ged): gr_src = Graph('gr1', 'gr1.gxl', 2) gr_trgt = Graph('gr2', 'gr2.gxl', 2) gr_src.add_node(Node(0, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_src.add_node(Node(1, LabelNodeAIDS('O', 1, 1, 2., 2.))) gr_trgt.add_node(Node(0, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_trgt.add_node(Node(1, LabelNodeAIDS('C', 1, 1, 2., 2.))) dist = ged.compute_edit_distance(gr_src, gr_trgt) np_C = np.asarray(ged.C) expected_C = np.array([[0., 0, 1.1, np.inf], [2.2, 2.2, np.inf, 1.1], [1.1, np.inf, 0., 0.], [np.inf, 1.1, 0., 0.]]) assert np.array_equal(np_C, expected_C) expected_dist = 2.2 assert dist == expected_dist
def test_3_nodes_1_node(ged): gr_src = Graph('gr1', 'gr1.gxl', 3) gr_trgt = Graph('gr2', 'gr2.gxl', 1) gr_src.add_node(Node(0, LabelNodeAIDS('O', 1, 1, 2., 2.))) gr_src.add_node(Node(1, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_src.add_node(Node(2, LabelNodeAIDS('O', 1, 1, 2., 2.))) gr_src.add_edge(Edge(0, 1, LabelEdge(0))) gr_src.add_edge(Edge(1, 2, LabelEdge(0))) gr_trgt.add_node(Node(0, LabelNodeAIDS('H', 1, 1, 2., 2.))) dist = ged.compute_edit_distance(gr_src, gr_trgt) print(ged.C.base) print(ged.C_star.base) expected_dist = 4.6 assert round(dist, 2) == expected_dist
def test_add_node_higher_than_num_nodes(num_nodes, error_idx): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) tmp_node = Node(error_idx, LabelNodeLetter(1, 1)) with pytest.raises(AssertionError) as execinfo: my_graph.add_node(tmp_node) error_msg = execinfo.value.args[0] expected_error_msg = f'The idx of the node {error_idx} exceed the number of nodes {num_nodes} authorized!' assert error_msg == expected_error_msg
def test_add_node(num_nodes): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) nodes = [] for i in range(num_nodes): tmp_node = Node(i, LabelNodeLetter(1, 1)) nodes.append(tmp_node) my_graph.add_node(tmp_node) assert my_graph.get_nodes() == nodes assert len(my_graph) == num_nodes
def test_out_in_degrees(num_nodes, expected_matrix): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) for i in range(num_nodes): tmp_node = Node(i, LabelNodeLetter(i, i)) my_graph.add_node(tmp_node) for idx_start, idx_end in combinations(range(num_nodes), 2): tmp_edge = Edge(idx_start, idx_end, LabelEdge(0)) my_graph.add_edge(tmp_edge) assert np.array_equal(np.asarray(my_graph.out_degrees()), expected_matrix)
def test_add_clique_edge(num_nodes, expected_edges): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) for i in range(num_nodes): tmp_node = Node(i, LabelNodeLetter(i, i)) my_graph.add_node(tmp_node) for idx_start, idx_end in combinations(range(num_nodes), 2): tmp_edge = Edge(idx_start, idx_end, LabelEdge(0)) my_graph.add_edge(tmp_edge) assert my_graph.get_edges() == expected_edges assert my_graph.has_edge(0, num_nodes - 1) == True assert my_graph.has_edge(num_nodes - 1, 0) == True assert my_graph.has_edge(0, num_nodes + 1) == False
def test_adjacency_matrix(num_nodes, expected_matrix): my_graph = Graph(f'gr{num_nodes}', f'gr{num_nodes}.gxl', num_nodes) for i in range(num_nodes): tmp_node = Node(i, LabelNodeLetter(i, i)) my_graph.add_node(tmp_node) for idx_start, idx_end in combinations(range(num_nodes), 2): tmp_edge = Edge(idx_start, idx_end, LabelEdge(0)) my_graph.add_edge(tmp_edge) # transform memoryview to np.array # my_graph.adjacency_matrix.base # np.asarray(my_graph.adjacency_matrix) assert np.array_equal(np.asarray(my_graph.adjacency_matrix), expected_matrix)
def test_pagerank_by_hand_big(): graph = Graph('gr', 'gr.xml', 6) graph2 = nx.Graph() for i in range(6): graph.add_node(Node(i, LabelNodeLetter(0, 0))) graph2.add_node(i) ### Add edge to graph graph.add_edge(Edge(0, 1, LabelEdge(0))) graph.add_edge(Edge(0, 2, LabelEdge(0))) graph.add_edge(Edge(0, 4, LabelEdge(0))) graph.add_edge(Edge(0, 5, LabelEdge(0))) graph.add_edge(Edge(1, 2, LabelEdge(0))) graph.add_edge(Edge(2, 3, LabelEdge(0))) graph.add_edge(Edge(2, 5, LabelEdge(0))) graph.add_edge(Edge(3, 4, LabelEdge(0))) graph.add_edge(Edge(3, 5, LabelEdge(0))) pagerank = PageRank() results = pagerank.calc_centrality_score(graph) results = np.asarray(results) ### Add edge to nx.graph graph2.add_edge(0, 1) graph2.add_edge(0, 2) graph2.add_edge(0, 4) graph2.add_edge(0, 5) graph2.add_edge(1, 2) graph2.add_edge(2, 3) graph2.add_edge(2, 5) graph2.add_edge(3, 4) graph2.add_edge(3, 5) expected = np.array([val for _, val in nx.pagerank_scipy(graph2).items()]) assert np.linalg.norm(results - expected) < 1e-6
def test_with_deleted_node(ged): gr_src = Graph('gr1', 'gr1.gxl', 3) gr_trgt = Graph('gr2', 'gr2.gxl', 3) gr_src = Graph('gr1', 'gr1.gxl', 5) gr_src.add_node(Node(0, LabelNodeAIDS('O', 1, 1, 2., 2.))) gr_src.add_node(Node(1, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_src.add_node(Node(2, LabelNodeAIDS('O', 1, 1, 2., 2.))) # print(gr_src) gr_src.add_node(Node(3, LabelNodeAIDS('Cl', 1, 1, 2., 2.))) gr_src.add_node(Node(4, LabelNodeAIDS('N', 1, 1, 2.4, 2.))) gr_src.add_edge(Edge(0, 3, LabelEdge(0))) gr_src.add_edge(Edge(1, 3, LabelEdge(0))) gr_src.add_edge(Edge(4, 2, LabelEdge(0))) gr_src.add_edge(Edge(3, 4, LabelEdge(0))) gr_src.remove_node_by_idx(4) gr_src.remove_node_by_idx(3) print(gr_src) gr_trgt.add_node(Node(0, LabelNodeAIDS('H', 1, 1, 2., 2.))) gr_trgt.add_node(Node(1, LabelNodeAIDS('Ca', 1, 1, 2., 2.))) gr_trgt.add_node(Node(2, LabelNodeAIDS('C', 1, 1, 2., 2.))) gr_trgt.add_edge(Edge(0, 1, LabelEdge(0))) gr_trgt.add_edge(Edge(1, 2, LabelEdge(0))) gr_trgt.add_edge(Edge(2, 0, LabelEdge(0))) dist = ged.compute_edit_distance(gr_src, gr_trgt) print(ged.C.base) print(ged.C_star.base) expected_dist = 4.7 print(dist) assert round(dist, 2) == expected_dist