def test_clique_merge():
    """
    Test for clique merge.
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "cm_nodes.csv"),
            os.path.join(RESOURCE_DIR, "cm_edges.csv"),
        ],
        "format": "csv",
    }
    t = Transformer()
    t.transform(input_args)
    updated_graph, clique_graph = clique_merge(
        target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map
    )
    leaders = NxGraph.get_node_attributes(updated_graph, "clique_leader")
    leader_list = list(leaders.keys())
    leader_list.sort()
    assert len(leader_list) == 2
    n1 = updated_graph.nodes()[leader_list[0]]
    assert n1["election_strategy"] == "PREFIX_PRIORITIZATION"
    assert "NCBIGene:100302240" in n1["same_as"]
    assert "ENSEMBL:ENSG00000284458" in n1["same_as"]
    n2 = updated_graph.nodes()[leader_list[1]]
    assert n2["election_strategy"] == "PREFIX_PRIORITIZATION"
    assert "NCBIGene:8202" in n2["same_as"]
    assert "OMIM:601937" in n2["same_as"]
    assert "ENSEMBL:ENSG00000124151" not in n2["same_as"]
def test_clique_merge():
    """
    Test for clique merge.
    """
    input_args = {
        'filename': [
            os.path.join(RESOURCE_DIR, 'cm_nodes.csv'),
            os.path.join(RESOURCE_DIR, 'cm_edges.csv'),
        ],
        'format':
        'csv',
    }
    t = Transformer()
    t.transform(input_args)
    updated_graph, clique_graph = clique_merge(
        target_graph=t.store.graph,
        prefix_prioritization_map=prefix_prioritization_map)
    leaders = NxGraph.get_node_attributes(updated_graph, 'clique_leader')
    leader_list = list(leaders.keys())
    leader_list.sort()
    assert len(leader_list) == 2
    n1 = updated_graph.nodes()[leader_list[0]]
    assert n1['election_strategy'] == 'PREFIX_PRIORITIZATION'
    assert 'NCBIGene:100302240' in n1['same_as']
    assert 'ENSEMBL:ENSG00000284458' in n1['same_as']
    n2 = updated_graph.nodes()[leader_list[1]]
    assert n2['election_strategy'] == 'PREFIX_PRIORITIZATION'
    assert 'NCBIGene:8202' in n2['same_as']
    assert 'OMIM:601937' in n2['same_as']
    assert 'ENSEMBL:ENSG00000124151' not in n2['same_as']
Esempio n. 3
0
def test_clique_merge8():
    """
    Test for clique merge where same_as appear as both node and edge properties.
    """
    ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]}
    g1 = NxGraph()
    g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]})
    g1.add_node("OMIM:2", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]})
    g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]})
    g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]})

    g1.add_node(
        "ENSEMBL:6", **{"category": ["biolink:Gene"], "same_as": ["NCBIGene:8"]}
    )
    g1.add_node("HGNC:7", **{"category": ["biolink:Gene"]})
    g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]})

    g1.add_edge(
        "NCBIGene:3",
        "HGNC:1",
        edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )

    g1.add_edge(
        "ENSEMBL:6",
        "NCBIGene:8",
        edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )
    g1.add_edge(
        "HGNC:7",
        "NCBIGene:8",
        edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )

    updated_graph, clique_graph = clique_merge(
        target_graph=g1, prefix_prioritization_map=ppm
    )
    assert updated_graph.number_of_nodes() == 2
    assert updated_graph.number_of_edges() == 0
    assert updated_graph.has_node("HGNC:1")
    assert updated_graph.has_node("HGNC:7")

    n1 = updated_graph.nodes()["HGNC:1"]
    assert "OMIM:2" in n1["same_as"]
    assert "NCBIGene:3" in n1["same_as"]
    assert "ENSEMBL:4" in n1["same_as"]

    n2 = updated_graph.nodes()["HGNC:7"]
    assert "ENSEMBL:6" in n2["same_as"]
    assert "NCBIGene:8" in n2["same_as"]

    assert not updated_graph.has_node("OMIM:2")
    assert not updated_graph.has_node("NCBIGene:3")
    assert not updated_graph.has_node("ENSEMBL:4")
    assert not updated_graph.has_node("ENSEMBL:6")
    assert not updated_graph.has_node("NCBIGene:8")
def test_clique_generation():
    """
    Test for generation of cliques.
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "cm_nodes.csv"),
            os.path.join(RESOURCE_DIR, "cm_edges.csv"),
        ],
        "format": "csv",
    }
    t = Transformer()
    t.transform(input_args)
    updated_graph, clique_graph = clique_merge(
        target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map
    )
    cliques = list(nx.strongly_connected_components(clique_graph))
    assert len(cliques) == 2
def test_clique_merge_edge_consolidation():
    """
    Test for clique merge, with edge consolidation.
    """
    input_args = {
        'filename': [
            os.path.join(RESOURCE_DIR, 'cm_test2_nodes.tsv'),
            os.path.join(RESOURCE_DIR, 'cm_test2_edges.tsv'),
        ],
        'format':
        'tsv',
    }
    t = Transformer()
    t.transform(input_args)
    updated_graph, clique_graph = clique_merge(
        target_graph=t.store.graph,
        prefix_prioritization_map=prefix_prioritization_map)
    leaders = NxGraph.get_node_attributes(updated_graph, 'clique_leader')
    leader_list = list(leaders.keys())
    leader_list.sort()
    assert len(leader_list) == 2

    n1 = updated_graph.nodes()[leader_list[0]]
    assert n1['election_strategy'] == 'LEADER_ANNOTATION'
    assert 'NCBIGene:100302240' in n1['same_as']
    assert 'ENSEMBL:ENSG00000284458' in n1['same_as']

    n2 = updated_graph.nodes()[leader_list[1]]
    assert n2['election_strategy'] == 'LEADER_ANNOTATION'
    assert 'NCBIGene:8202' in n2['same_as']
    assert 'OMIM:601937' in n2['same_as']
    assert 'ENSEMBL:ENSG00000124151' not in n2['same_as']

    e1_incoming = updated_graph.in_edges('HGNC:7670', data=True)
    assert len(e1_incoming) == 3

    e1_outgoing = updated_graph.out_edges('HGNC:7670', data=True)
    assert len(e1_outgoing) == 6
def test_clique_merge_edge_consolidation():
    """
    Test for clique merge, with edge consolidation.
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "cm_test2_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "cm_test2_edges.tsv"),
        ],
        "format": "tsv",
    }
    t = Transformer()
    t.transform(input_args)
    updated_graph, clique_graph = clique_merge(
        target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map
    )
    leaders = NxGraph.get_node_attributes(updated_graph, "clique_leader")
    leader_list = list(leaders.keys())
    leader_list.sort()
    assert len(leader_list) == 2

    n1 = updated_graph.nodes()[leader_list[0]]
    assert n1["election_strategy"] == "LEADER_ANNOTATION"
    assert "NCBIGene:100302240" in n1["same_as"]
    assert "ENSEMBL:ENSG00000284458" in n1["same_as"]

    n2 = updated_graph.nodes()[leader_list[1]]
    assert n2["election_strategy"] == "LEADER_ANNOTATION"
    assert "NCBIGene:8202" in n2["same_as"]
    assert "OMIM:601937" in n2["same_as"]
    assert "ENSEMBL:ENSG00000124151" not in n2["same_as"]

    e1_incoming = updated_graph.in_edges("HGNC:7670", data=True)
    assert len(e1_incoming) == 3

    e1_outgoing = updated_graph.out_edges("HGNC:7670", data=True)
    assert len(e1_outgoing) == 6
Esempio n. 7
0
def test_clique_merge1():
    """
    Test to perform a clique merge where all nodes in a clique are valid.
    """
    ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']}
    g1 = NxGraph()
    g1.add_node('HGNC:1', **{'category': ['biolink:Gene']})
    g1.add_node('OMIM:2', **{'category': ['biolink:Gene']})
    g1.add_node('NCBIGene:3', **{'category': ['biolink:Gene']})
    g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']})

    g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']})
    g1.add_node('HGNC:7', **{'category': ['biolink:Gene']})
    g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']})

    g1.add_edge('ENSEMBL:4',
                'HGNC:1',
                edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('NCBIGene:3',
                'HGNC:1',
                edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('OMIM:2',
                'HGNC:1',
                edge_key=generate_edge_key('OMIM:2', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    g1.add_edge('ENSEMBL:6',
                'NCBIGene:8',
                edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('HGNC:7',
                'NCBIGene:8',
                edge_key=generate_edge_key('HGNC:7', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    updated_graph, clique_graph = clique_merge(target_graph=g1,
                                               prefix_prioritization_map=ppm)
    print_graph(updated_graph)
    assert updated_graph.number_of_nodes() == 2
    assert updated_graph.number_of_edges() == 0
    assert updated_graph.has_node('HGNC:1')
    assert updated_graph.has_node('HGNC:7')

    n1 = updated_graph.nodes()['HGNC:1']
    assert 'OMIM:2' in n1['same_as']
    assert 'NCBIGene:3' in n1['same_as']
    assert 'ENSEMBL:4' in n1['same_as']

    n2 = updated_graph.nodes()['HGNC:7']
    assert 'ENSEMBL:6' in n2['same_as']
    assert 'NCBIGene:8' in n2['same_as']

    assert not updated_graph.has_node('OMIM:2')
    assert not updated_graph.has_node('NCBIGene:3')
    assert not updated_graph.has_node('ENSEMBL:4')
    assert not updated_graph.has_node('ENSEMBL:6')
    assert not updated_graph.has_node('NCBIGene:8')
Esempio n. 8
0
def test_clique_merge9():
    """
    Test for clique merge where same_as appear as both node and edge properties,
    but an invalid node also has a same_as property and participates in same_as edge.
    """
    ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']}
    g1 = NxGraph()
    g1.add_node('HGNC:1', **{'category': ['biolink:Gene']})
    g1.add_node('OMIM:2', **{
        'category': ['biolink:Disease'],
        'same_as': ['HGNC:1']
    })
    g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']})
    g1.add_node('ENSEMBL:4', **{
        'category': ['biolink:Gene'],
        'same_as': ['HGNC:1']
    })

    g1.add_node('ENSEMBL:6', **{
        'category': ['biolink:Gene'],
        'same_as': ['NCBIGene:8']
    })
    g1.add_node('HGNC:7', **{'category': ['biolink:Gene']})
    g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']})

    g1.add_edge('X:00001',
                'OMIM:2',
                edge_key=generate_edge_key('X:00001', 'biolink:same_as',
                                           'OMIM:2'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('NCBIGene:3',
                'HGNC:1',
                edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    g1.add_edge('ENSEMBL:6',
                'NCBIGene:8',
                edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('HGNC:7',
                'NCBIGene:8',
                edge_key=generate_edge_key('HGNC:7', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    updated_graph, clique_graph = clique_merge(target_graph=g1,
                                               prefix_prioritization_map=ppm)
    assert updated_graph.number_of_nodes() == 4
    assert updated_graph.number_of_edges() == 1
    assert updated_graph.has_node('HGNC:1')
    assert updated_graph.has_node('HGNC:7')

    n1 = updated_graph.nodes()['HGNC:1']
    assert 'OMIM:2' not in n1['same_as']
    assert 'NCBIGene:3' in n1['same_as']
    assert 'ENSEMBL:4' in n1['same_as']

    n2 = updated_graph.nodes()['HGNC:7']
    assert 'ENSEMBL:6' in n2['same_as']
    assert 'NCBIGene:8' in n2['same_as']

    assert updated_graph.has_node('OMIM:2')
Esempio n. 9
0
def test_clique_merge7():
    """
    Test for clique merge where each clique has a node that has
    a disjoint category from other nodes in a clique and the node is
    not a participant in same_as edges.
    """
    ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']}
    g1 = NxGraph()
    g1.add_node('HGNC:1', **{'category': ['biolink:Gene']})
    g1.add_node('OMIM:2', **{'category': ['biolink:Disease']})
    g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']})
    g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']})

    g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']})
    g1.add_node('HGNC:7', **{'category': ['biolink:Disease']})
    g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']})

    g1.add_edge('ENSEMBL:4',
                'HGNC:1',
                edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('NCBIGene:3',
                'HGNC:1',
                edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('OMIM:2',
                'HGNC:1',
                edge_key=generate_edge_key('OMIM:2', 'biolink:same_as',
                                           'HGNC:1'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    g1.add_edge('ENSEMBL:6',
                'NCBIGene:8',
                edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })
    g1.add_edge('HGNC:7',
                'NCBIGene:8',
                edge_key=generate_edge_key('HGNC:7', 'biolink:same_as',
                                           'NCBIGene:8'),
                **{
                    'predicate': 'biolink:same_as',
                    'relation': 'owl:equivalentClass'
                })

    updated_graph, clique_graph = clique_merge(target_graph=g1,
                                               prefix_prioritization_map=ppm)
    assert updated_graph.number_of_nodes() == 4
    assert updated_graph.number_of_edges() == 2
    assert updated_graph.has_node('HGNC:1')
    assert updated_graph.has_node('NCBIGene:8')

    n1 = updated_graph.nodes()['HGNC:1']
    assert 'NCBIGene:3' in n1['same_as']
    assert 'ENSEMBL:4' in n1['same_as']
    assert 'OMIM:2' not in n1['same_as']

    n2 = updated_graph.nodes()['NCBIGene:8']
    assert 'ENSEMBL:6' in n2['same_as']

    assert updated_graph.has_node('OMIM:2')
    assert not updated_graph.has_node('NCBIGene:3')
    assert not updated_graph.has_node('ENSEMBL:4')
    assert updated_graph.has_node('HGNC:7')
Esempio n. 10
0
def test_clique_merge7():
    """
    Test for clique merge where each clique has a node that has
    a disjoint category from other nodes in a clique and the node is
    not a participant in same_as edges.
    """
    ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]}
    g1 = NxGraph()
    g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]})
    g1.add_node("OMIM:2", **{"category": ["biolink:Disease"]})
    g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]})
    g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"]})

    g1.add_node("ENSEMBL:6", **{"category": ["biolink:Gene"]})
    g1.add_node("HGNC:7", **{"category": ["biolink:Disease"]})
    g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]})

    g1.add_edge(
        "ENSEMBL:4",
        "HGNC:1",
        edge_key=generate_edge_key("ENSEMBL:4", "biolink:same_as", "HGNC:1"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )
    g1.add_edge(
        "NCBIGene:3",
        "HGNC:1",
        edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )
    g1.add_edge(
        "OMIM:2",
        "HGNC:1",
        edge_key=generate_edge_key("OMIM:2", "biolink:same_as", "HGNC:1"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )

    g1.add_edge(
        "ENSEMBL:6",
        "NCBIGene:8",
        edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )
    g1.add_edge(
        "HGNC:7",
        "NCBIGene:8",
        edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"),
        **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"}
    )

    updated_graph, clique_graph = clique_merge(
        target_graph=g1, prefix_prioritization_map=ppm
    )
    assert updated_graph.number_of_nodes() == 4
    assert updated_graph.number_of_edges() == 2
    assert updated_graph.has_node("HGNC:1")
    assert updated_graph.has_node("NCBIGene:8")

    n1 = updated_graph.nodes()["HGNC:1"]
    assert "NCBIGene:3" in n1["same_as"]
    assert "ENSEMBL:4" in n1["same_as"]
    assert "OMIM:2" not in n1["same_as"]

    n2 = updated_graph.nodes()["NCBIGene:8"]
    assert "ENSEMBL:6" in n2["same_as"]

    assert updated_graph.has_node("OMIM:2")
    assert not updated_graph.has_node("NCBIGene:3")
    assert not updated_graph.has_node("ENSEMBL:4")
    assert updated_graph.has_node("HGNC:7")