def test_clique_merge(): """ Test for clique merge. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map ) leaders = NxGraph.get_node_attributes(updated_graph, "clique_leader") leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1["election_strategy"] == "PREFIX_PRIORITIZATION" assert "NCBIGene:100302240" in n1["same_as"] assert "ENSEMBL:ENSG00000284458" in n1["same_as"] n2 = updated_graph.nodes()[leader_list[1]] assert n2["election_strategy"] == "PREFIX_PRIORITIZATION" assert "NCBIGene:8202" in n2["same_as"] assert "OMIM:601937" in n2["same_as"] assert "ENSEMBL:ENSG00000124151" not in n2["same_as"]
def test_clique_merge(): """ Test for clique merge. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'cm_nodes.csv'), os.path.join(RESOURCE_DIR, 'cm_edges.csv'), ], 'format': 'csv', } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map) leaders = NxGraph.get_node_attributes(updated_graph, 'clique_leader') leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1['election_strategy'] == 'PREFIX_PRIORITIZATION' assert 'NCBIGene:100302240' in n1['same_as'] assert 'ENSEMBL:ENSG00000284458' in n1['same_as'] n2 = updated_graph.nodes()[leader_list[1]] assert n2['election_strategy'] == 'PREFIX_PRIORITIZATION' assert 'NCBIGene:8202' in n2['same_as'] assert 'OMIM:601937' in n2['same_as'] assert 'ENSEMBL:ENSG00000124151' not in n2['same_as']
def test_clique_merge8(): """ Test for clique merge where same_as appear as both node and edge properties. """ ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]} g1 = NxGraph() g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]}) g1.add_node("OMIM:2", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]}) g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]}) g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]}) g1.add_node( "ENSEMBL:6", **{"category": ["biolink:Gene"], "same_as": ["NCBIGene:8"]} ) g1.add_node("HGNC:7", **{"category": ["biolink:Gene"]}) g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]}) g1.add_edge( "NCBIGene:3", "HGNC:1", edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "ENSEMBL:6", "NCBIGene:8", edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "HGNC:7", "NCBIGene:8", edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) updated_graph, clique_graph = clique_merge( target_graph=g1, prefix_prioritization_map=ppm ) assert updated_graph.number_of_nodes() == 2 assert updated_graph.number_of_edges() == 0 assert updated_graph.has_node("HGNC:1") assert updated_graph.has_node("HGNC:7") n1 = updated_graph.nodes()["HGNC:1"] assert "OMIM:2" in n1["same_as"] assert "NCBIGene:3" in n1["same_as"] assert "ENSEMBL:4" in n1["same_as"] n2 = updated_graph.nodes()["HGNC:7"] assert "ENSEMBL:6" in n2["same_as"] assert "NCBIGene:8" in n2["same_as"] assert not updated_graph.has_node("OMIM:2") assert not updated_graph.has_node("NCBIGene:3") assert not updated_graph.has_node("ENSEMBL:4") assert not updated_graph.has_node("ENSEMBL:6") assert not updated_graph.has_node("NCBIGene:8")
def test_clique_generation(): """ Test for generation of cliques. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map ) cliques = list(nx.strongly_connected_components(clique_graph)) assert len(cliques) == 2
def test_clique_merge_edge_consolidation(): """ Test for clique merge, with edge consolidation. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'cm_test2_nodes.tsv'), os.path.join(RESOURCE_DIR, 'cm_test2_edges.tsv'), ], 'format': 'tsv', } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map) leaders = NxGraph.get_node_attributes(updated_graph, 'clique_leader') leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1['election_strategy'] == 'LEADER_ANNOTATION' assert 'NCBIGene:100302240' in n1['same_as'] assert 'ENSEMBL:ENSG00000284458' in n1['same_as'] n2 = updated_graph.nodes()[leader_list[1]] assert n2['election_strategy'] == 'LEADER_ANNOTATION' assert 'NCBIGene:8202' in n2['same_as'] assert 'OMIM:601937' in n2['same_as'] assert 'ENSEMBL:ENSG00000124151' not in n2['same_as'] e1_incoming = updated_graph.in_edges('HGNC:7670', data=True) assert len(e1_incoming) == 3 e1_outgoing = updated_graph.out_edges('HGNC:7670', data=True) assert len(e1_outgoing) == 6
def test_clique_merge_edge_consolidation(): """ Test for clique merge, with edge consolidation. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "cm_test2_nodes.tsv"), os.path.join(RESOURCE_DIR, "cm_test2_edges.tsv"), ], "format": "tsv", } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map ) leaders = NxGraph.get_node_attributes(updated_graph, "clique_leader") leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1["election_strategy"] == "LEADER_ANNOTATION" assert "NCBIGene:100302240" in n1["same_as"] assert "ENSEMBL:ENSG00000284458" in n1["same_as"] n2 = updated_graph.nodes()[leader_list[1]] assert n2["election_strategy"] == "LEADER_ANNOTATION" assert "NCBIGene:8202" in n2["same_as"] assert "OMIM:601937" in n2["same_as"] assert "ENSEMBL:ENSG00000124151" not in n2["same_as"] e1_incoming = updated_graph.in_edges("HGNC:7670", data=True) assert len(e1_incoming) == 3 e1_outgoing = updated_graph.out_edges("HGNC:7670", data=True) assert len(e1_outgoing) == 6
def test_clique_merge1(): """ Test to perform a clique merge where all nodes in a clique are valid. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:3', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']}) g1.add_node('HGNC:7', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('ENSEMBL:4', 'HGNC:1', edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('OMIM:2', 'HGNC:1', edge_key=generate_edge_key('OMIM:2', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) print_graph(updated_graph) assert updated_graph.number_of_nodes() == 2 assert updated_graph.number_of_edges() == 0 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('HGNC:7') n1 = updated_graph.nodes()['HGNC:1'] assert 'OMIM:2' in n1['same_as'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] n2 = updated_graph.nodes()['HGNC:7'] assert 'ENSEMBL:6' in n2['same_as'] assert 'NCBIGene:8' in n2['same_as'] assert not updated_graph.has_node('OMIM:2') assert not updated_graph.has_node('NCBIGene:3') assert not updated_graph.has_node('ENSEMBL:4') assert not updated_graph.has_node('ENSEMBL:6') assert not updated_graph.has_node('NCBIGene:8')
def test_clique_merge9(): """ Test for clique merge where same_as appear as both node and edge properties, but an invalid node also has a same_as property and participates in same_as edge. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{ 'category': ['biolink:Disease'], 'same_as': ['HGNC:1'] }) g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']}) g1.add_node('ENSEMBL:4', **{ 'category': ['biolink:Gene'], 'same_as': ['HGNC:1'] }) g1.add_node('ENSEMBL:6', **{ 'category': ['biolink:Gene'], 'same_as': ['NCBIGene:8'] }) g1.add_node('HGNC:7', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('X:00001', 'OMIM:2', edge_key=generate_edge_key('X:00001', 'biolink:same_as', 'OMIM:2'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 1 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('HGNC:7') n1 = updated_graph.nodes()['HGNC:1'] assert 'OMIM:2' not in n1['same_as'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] n2 = updated_graph.nodes()['HGNC:7'] assert 'ENSEMBL:6' in n2['same_as'] assert 'NCBIGene:8' in n2['same_as'] assert updated_graph.has_node('OMIM:2')
def test_clique_merge7(): """ Test for clique merge where each clique has a node that has a disjoint category from other nodes in a clique and the node is not a participant in same_as edges. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{'category': ['biolink:Disease']}) g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']}) g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']}) g1.add_node('HGNC:7', **{'category': ['biolink:Disease']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('ENSEMBL:4', 'HGNC:1', edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('OMIM:2', 'HGNC:1', edge_key=generate_edge_key('OMIM:2', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 2 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('NCBIGene:8') n1 = updated_graph.nodes()['HGNC:1'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] assert 'OMIM:2' not in n1['same_as'] n2 = updated_graph.nodes()['NCBIGene:8'] assert 'ENSEMBL:6' in n2['same_as'] assert updated_graph.has_node('OMIM:2') assert not updated_graph.has_node('NCBIGene:3') assert not updated_graph.has_node('ENSEMBL:4') assert updated_graph.has_node('HGNC:7')
def test_clique_merge7(): """ Test for clique merge where each clique has a node that has a disjoint category from other nodes in a clique and the node is not a participant in same_as edges. """ ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]} g1 = NxGraph() g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]}) g1.add_node("OMIM:2", **{"category": ["biolink:Disease"]}) g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]}) g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"]}) g1.add_node("ENSEMBL:6", **{"category": ["biolink:Gene"]}) g1.add_node("HGNC:7", **{"category": ["biolink:Disease"]}) g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]}) g1.add_edge( "ENSEMBL:4", "HGNC:1", edge_key=generate_edge_key("ENSEMBL:4", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "NCBIGene:3", "HGNC:1", edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "OMIM:2", "HGNC:1", edge_key=generate_edge_key("OMIM:2", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "ENSEMBL:6", "NCBIGene:8", edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "HGNC:7", "NCBIGene:8", edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) updated_graph, clique_graph = clique_merge( target_graph=g1, prefix_prioritization_map=ppm ) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 2 assert updated_graph.has_node("HGNC:1") assert updated_graph.has_node("NCBIGene:8") n1 = updated_graph.nodes()["HGNC:1"] assert "NCBIGene:3" in n1["same_as"] assert "ENSEMBL:4" in n1["same_as"] assert "OMIM:2" not in n1["same_as"] n2 = updated_graph.nodes()["NCBIGene:8"] assert "ENSEMBL:6" in n2["same_as"] assert updated_graph.has_node("OMIM:2") assert not updated_graph.has_node("NCBIGene:3") assert not updated_graph.has_node("ENSEMBL:4") assert updated_graph.has_node("HGNC:7")