def test_mutagenicity_with_deleted_nodes(mutagenicity_graphs, dataframe_mutagenicity, graph_name_source_target): gr_name_src, gr_name_trgt = ['mutagen/' + name for name in graph_name_source_target] graph_name_source, graph_name_target = graph_name_source_target graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0] cst_cost_node = 11.0 cst_cost_edge = 1.1 ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac')) # Reproduce the source graph with more nodes new_gr_src = Graph(gr_name_src, 'gr.xls', len(graph_source)+2) for node in graph_source.nodes: new_gr_src.add_node(node) for idx, edges in graph_source.get_edges().items(): for edge in edges: if edge is None: continue new_gr_src.add_edge(edge) new_gr_src.add_node(Node(len(graph_source), LabelNodeMutagenicity('C'))) new_gr_src.add_node(Node(len(graph_source)+1, LabelNodeMutagenicity('N'))) # Add random Edges for _ in range(4): new_gr_src.add_edge(Edge(len(graph_source), random.randint(0, len(graph_source)-1), LabelEdge(0))) for _ in range(6): new_gr_src.add_edge(Edge(len(graph_source) + 1, random.randint(0, len(graph_source)), LabelEdge(0))) new_gr_src.remove_node_by_idx(len(graph_source)) new_gr_src.remove_node_by_idx(len(graph_source)) results = ged.compute_edit_distance(new_gr_src, graph_target) expected = dataframe_mutagenicity.loc[gr_name_src, gr_name_trgt] # import numpy as np # np.savetxt(f'_c_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C), fmt='%10.3f', delimiter=';') # np.savetxt(f'c_star_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C_star), fmt='%10.3f', delimiter=';') print(f'###### diff {results - expected}') print(f'{graph_name_source_target}: new dist {results} - old dist {expected}') print(f'exp {expected}') assert results == expected
def define_graphs(): ged = GED(EditCostLetter(1., 1., 1., 1., 'manhattan')) n, m = 4, 3 graph_source = Graph('gr_source', 'gr_source.gxl', n) graph_target = Graph('gr_target', 'gr_targe.gxl', m) # Init graph source: add nodes and edges graph_source.add_node(Node(0, LabelNodeLetter(1, 0))) graph_source.add_node(Node(1, LabelNodeLetter(2, 0))) graph_source.add_node(Node(2, LabelNodeLetter(1, 0))) graph_source.add_node(Node(3, LabelNodeLetter(3, 0))) graph_source.add_edge(Edge(0, 1, LabelEdge(0))) graph_source.add_edge(Edge(1, 2, LabelEdge(0))) graph_source.add_edge(Edge(1, 3, LabelEdge(0))) graph_source.add_edge(Edge(2, 3, LabelEdge(0))) # Init graph target: add nodes and edges graph_target.add_node(Node(0, LabelNodeLetter(3, 0))) graph_target.add_node(Node(1, LabelNodeLetter(2, 0))) graph_target.add_node(Node(2, LabelNodeLetter(2, 0))) graph_target.add_edge(Edge(0, 1, LabelEdge(0))) graph_target.add_edge(Edge(1, 2, LabelEdge(0))) return ged, graph_source, graph_target
def test_NCI1(NCI1_graphs, graph_name_source_target, expected): graph_name_source, graph_name_target = graph_name_source_target graph_source = [graph for graph in NCI1_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in NCI1_graphs if graph.name == graph_name_target][0] cst_cost_node = 1.0 cst_cost_edge = 1.0 ged = GED(EditCostNCI1(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac', alpha=0.9)) results = ged.compute_edit_distance(graph_source, graph_target, heuristic=True) print(f'ged : {results}') assert round(results, 5) == expected
def test_heuristic_inverse(mutagenicity_graphs, graph_name_source_target): graph_name_source, graph_name_target = graph_name_source_target graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0] cst_cost_node = 11.0 cst_cost_edge = 1.1 ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac')) results = ged.compute_edit_distance(graph_source, graph_target, heuristic=True) results_inv = ged.compute_edit_distance(graph_target, graph_source, heuristic=True) print(f'result: {results}') print(f'result_inv: {results_inv}') assert results == results_inv
def test_aids(aids_graphs, dataframe_aids, graph_name_source, graph_name_target, gr_name_src, gr_name_trgt): graph_source = [graph for graph in aids_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in aids_graphs if graph.name == graph_name_target][0] cst_cost_node = 1.1 cst_cost_edge = 0.1 ged = GED(EditCostAIDS(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac')) results = ged.compute_edit_distance(graph_source, graph_target) expected = dataframe_aids.loc[gr_name_src, gr_name_trgt] print(ged.C.base) print(ged.C_star.base) assert results == expected assert False
def test_mutagenicity_alpha(mutagenicity_graphs, graph_name_source_target): graph_name_source, graph_name_target = graph_name_source_target graph_source = [ graph for graph in mutagenicity_graphs if graph.name == graph_name_source ][0] graph_target = [ graph for graph in mutagenicity_graphs if graph.name == graph_name_target ][0] cst_cost_node = 11.0 cst_cost_edge = 1.1 edit_cost = EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac') edit_cost_alpha = EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac', alpha=0.5) ged = GED(edit_cost) ged_alpha = GED(edit_cost_alpha) results = ged_alpha.compute_edit_distance(graph_source, graph_target) expected = ged.compute_edit_distance(graph_source, graph_target) / 2. assert results == expected
def test_aids_alpha(aids_graphs, graph_name_source, graph_name_target): graph_source = [ graph for graph in aids_graphs if graph.name == graph_name_source ][0] graph_target = [ graph for graph in aids_graphs if graph.name == graph_name_target ][0] cst_cost_node = 1.1 cst_cost_edge = 0.1 edit_cost = EditCostAIDS(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac') edit_cost_alpha = EditCostAIDS(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac', alpha=0.5) ged = GED(edit_cost) ged_alpha = GED(edit_cost_alpha) results = ged_alpha.compute_edit_distance(graph_source, graph_target) expected = ged.compute_edit_distance(graph_source, graph_target) / 2. assert results == expected
def test_with_verified_data(letter_graphs, dataframe_letter, graph_source_target, accuracy): gr_name_src, gr_name_trgt = [name[0] + '/' + name for name in graph_source_target] graph_source, graph_target = [graph for graph in letter_graphs if graph.name in graph_source_target] # print(graph_source) # print(graph_target) cst_cost_node = 0.9 cst_cost_edge = 2.3 ged = GED(EditCostLetter(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'euclidean')) results = ged.compute_edit_distance(graph_source, graph_target) expected = dataframe_letter.loc[gr_name_src, gr_name_trgt] np.set_printoptions(precision=2) print(np.asarray(ged.C)) print(np.asarray(ged.C_star)) print(f'res {results}') print(f'exp {expected}') print(f'###### diff {results - expected}') # assert results == expected assert (results - expected) < accuracy
def test_mutagenicity(mutagenicity_graphs, dataframe_mutagenicity, graph_name_source_target): gr_name_src, gr_name_trgt = ['mutagen/' + name for name in graph_name_source_target] graph_name_source, graph_name_target = graph_name_source_target graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0] graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0] # print(graph_source) # print(graph_target) cst_cost_node = 11.0 cst_cost_edge = 1.1 ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'dirac')) results = ged.compute_edit_distance(graph_source, graph_target) expected = dataframe_mutagenicity.loc[gr_name_src, gr_name_trgt] # import numpy as np # np.savetxt(f'_c_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C), fmt='%10.3f', delimiter=';') # np.savetxt(f'c_star_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C_star), fmt='%10.3f', delimiter=';') # print(f'###### diff {results - expected}') # print(f'{graph_name_source_target}: new dist {results} - old dist {expected}') # print(f'exp {expected}') assert results == expected
def test_simple_alpha(define_graphs): graph_source, graph_target = define_graphs # edit_cost = EditCostLetter(1., 1., 1., 1., 'euclidean') edit_cost_alpha = EditCostLetter(1., 1., 1., 1., 'euclidean', alpha=0.5) # ged = GED(edit_cost) ged_alpha = GED(edit_cost_alpha) # cost = ged.compute_edit_distance(graph_source, graph_target) cost_alpha = ged_alpha.compute_edit_distance(graph_source, graph_target) expected_cost_alpha = 2. expected_C = np.array([[2., 1., 1., 1., np.inf, np.inf, np.inf], [1., 0., 0., np.inf, 1., np.inf, np.inf], [2., 1., 1., np.inf, np.inf, 1., np.inf], [0., 1., 1., np.inf, np.inf, np.inf, 1.], [1., np.inf, np.inf, 0., 0., 0., 0.], [np.inf, 1., np.inf, 0., 0., 0., 0.], [np.inf, np.inf, 1., 0., 0., 0., 0.]]) expected_C_alpha = expected_C / 2 expected_C_star = np.array([[2., 2., 1., 2., np.inf, np.inf, np.inf], [3., 1., 2., np.inf, 4., np.inf, np.inf], [3., 1., 2., np.inf, np.inf, 3., np.inf], [1., 1., 2., np.inf, np.inf, np.inf, 3.], [2., np.inf, np.inf, 0., 0., 0., 0.], [np.inf, 3., np.inf, 0., 0., 0., 0.], [np.inf, np.inf, 2., 0., 0., 0., 0.]]) expected_C_star_alpha = expected_C_star / 2 print(ged_alpha.C.base) assert np.array_equal(np.asarray(ged_alpha.C), expected_C_alpha) assert np.array_equal(np.asarray(ged_alpha.C_star), expected_C_star_alpha) assert cost_alpha == expected_cost_alpha
def test_letter_alpha_0_5(graphs, graph_source_target, accuracy): graph_source, graph_target = [ graph for graph in graphs if graph.name in graph_source_target ] cst_cost_node = 0.9 cst_cost_edge = 2.3 edit_cost = EditCostLetter(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'euclidean') edit_cost_alpha = EditCostLetter(cst_cost_node, cst_cost_node, cst_cost_edge, cst_cost_edge, 'euclidean', alpha=0.5) ged = GED(edit_cost) ged_alpha = GED(edit_cost_alpha) results = ged_alpha.compute_edit_distance(graph_source, graph_target) expected = ged.compute_edit_distance(graph_source, graph_target) / 2. assert (results - expected) < accuracy
def test_alpha_0_25(define_graphs): graph_source, graph_target = define_graphs edit_cost = EditCostLetter(1., 1., 1., 1., 'euclidean') edit_cost_alpha = EditCostLetter(1., 1., 1., 1., 'euclidean', alpha=0.25) ged = GED(edit_cost) ged_alpha = GED(edit_cost_alpha) cost = ged.compute_edit_distance(graph_source, graph_target) cost_alpha = ged_alpha.compute_edit_distance(graph_source, graph_target) expected_cost_alpha = 2. expected_C = np.array([[2., 1., 1., 1., np.inf, np.inf, np.inf], [1., 0., 0., np.inf, 1., np.inf, np.inf], [2., 1., 1., np.inf, np.inf, 1., np.inf], [0., 1., 1., np.inf, np.inf, np.inf, 1.], [1., np.inf, np.inf, 0., 0., 0., 0.], [np.inf, 1., np.inf, 0., 0., 0., 0.], [np.inf, np.inf, 1., 0., 0., 0., 0.]]) expected_C_alpha = expected_C / 4 expected_C_star_alpha = np.array( [[0.5, 1., 0.25, 1., np.inf, np.inf, np.inf], [1.75, 0.75, 1.5, np.inf, 2.5, np.inf, np.inf], [1.25, 0.25, 1., np.inf, np.inf, 1.75, np.inf], [0.75, 0.25, 1., np.inf, np.inf, np.inf, 1.75], [1., np.inf, np.inf, 0., 0., 0., 0.], [np.inf, 1.75, np.inf, 0., 0., 0., 0.], [np.inf, np.inf, 1., 0., 0., 0., 0.]]) # expected_C_star_alpha = expected_C_star / 2 print(ged_alpha.C_star.base) assert np.array_equal(np.asarray(ged_alpha.C), expected_C_alpha) assert np.array_equal(np.asarray(ged_alpha.C_star), expected_C_star_alpha) assert cost_alpha == expected_cost_alpha
def ged(): graph_edit_distance = GED(EditCostAIDS(1.1, 1.1, 0.1, 0.1, 'dirac')) return graph_edit_distance