def test_read_nt4():
    """
    Read from an RDF N-Triple file using RdfSource, with user defined
    node property predicates.
    """
    node_property_predicates = {
        f"https://www.example.org/UNKNOWN/{x}"
        for x in ['fusion', 'homology', 'combined_score', 'cooccurence']
    }
    source = RdfSource()
    source.set_node_property_predicates(node_property_predicates)
    g = source.parse(filename=os.path.join(RESOURCE_DIR, 'rdf', 'test3.nt'),
                     format='nt')
    nodes, edges = process_stream(g)
    assert len(nodes.keys()) == 7
    assert len(edges.keys()) == 6

    n1 = nodes['ENSEMBL:ENSG0000000000001']
    assert n1['type'] == 'SO:0000704'
    assert len(n1['category']) == 4
    assert 'biolink:Gene' in n1['category']
    assert 'biolink:GenomicEntity' in n1['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n1['name'] == 'Test Gene 123'
    assert n1['description'] == 'This is a Test Gene 123'
    assert 'Test Dataset' in n1['provided_by']

    n2 = nodes['ENSEMBL:ENSG0000000000002']
    assert n2['type'] == 'SO:0000704'
    assert len(n2['category']) == 4
    assert 'biolink:Gene' in n2['category']
    assert 'biolink:GenomicEntity' in n2['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n2['name'] == 'Test Gene 456'
    assert n2['description'] == 'This is a Test Gene 456'
    assert 'Test Dataset' in n2['provided_by']

    e1 = edges['ENSEMBL:ENSP0000000000001', 'ENSEMBL:ENSP0000000000002'][0]
    assert e1['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e1['object'] == 'ENSEMBL:ENSP0000000000002'
    assert e1['predicate'] == 'biolink:interacts_with'
    assert e1['relation'] == 'biolink:interacts_with'
    assert e1['type'] == 'biolink:Association'
    assert e1['id'] == 'urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518'
    assert e1['fusion'] == '0'
    assert e1['homology'] == '0.0'
    assert e1['combined_score'] == '490.0'
    assert e1['cooccurence'] == '332'

    e2 = edges['ENSEMBL:ENSP0000000000001', 'UniProtKB:X0000001'][0]
    assert e2['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e2['object'] == 'UniProtKB:X0000001'
    assert e2['predicate'] == 'biolink:same_as'
    assert e2['relation'] == 'owl:equivalentClass'

    e3 = edges['ENSEMBL:ENSP0000000000001', 'MONDO:0000001'][0]
    assert e3['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e3['object'] == 'MONDO:0000001'
    assert e3['predicate'] == 'biolink:treats'
    assert e3['relation'] == 'RO:0002606'
def test_read_nt1():
    """
    Read from an RDF N-Triple file using RdfSource.
    """
    s = RdfSource()
    g = s.parse(os.path.join(RESOURCE_DIR, 'rdf', 'test1.nt'))
    nodes, edges = process_stream(g)
    assert len(nodes) == 2
    assert len(edges) == 1

    n1 = nodes['ENSEMBL:ENSG0000000000001']
    assert n1['type'] == 'SO:0000704'
    assert len(n1['category']) == 4
    assert 'biolink:Gene' in n1['category']
    assert 'biolink:GenomicEntity' in n1['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n1['name'] == 'Test Gene 123'
    assert n1['description'] == 'This is a Test Gene 123'
    assert 'Test Dataset' in n1['provided_by']

    n2 = nodes['ENSEMBL:ENSG0000000000002']
    assert n2['type'] == 'SO:0000704'
    assert len(n2['category']) == 4
    assert 'biolink:Gene' in n2['category']
    assert 'biolink:GenomicEntity' in n2['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n2['name'] == 'Test Gene 456'
    assert n2['description'] == 'This is a Test Gene 456'
    assert 'Test Dataset' in n2['provided_by']

    e = list(edges.values())[0][0]
    assert e['subject'] == 'ENSEMBL:ENSG0000000000001'
    assert e['object'] == 'ENSEMBL:ENSG0000000000002'
    assert e['predicate'] == 'biolink:interacts_with'
    assert e['relation'] == 'biolink:interacts_with'
def test_prepare_data_dict(query):
    """
    Test for internal _prepare_data_dict method in RdfSource.
    """
    source = RdfSource()
    new_data = source._prepare_data_dict(query[0], query[1])
    for k, v in query[2].items():
        assert new_data[k] == v
    for k, v in query[3].items():
        assert new_data[k] == v
Exemple #4
0
def test_read_nt3():
    """
    Read from an RDF N-Triple file using RdfSource, with user defined
    node property predicates.
    """
    node_property_predicates = {
        f"https://www.example.org/UNKNOWN/{x}"
        for x in ["fusion", "homology", "combined_score", "cooccurence"]
    }

    t = Transformer()
    source = RdfSource(t)

    source.set_node_property_predicates(node_property_predicates)
    g = source.parse(filename=os.path.join(RESOURCE_DIR, "rdf", "test2.nt"),
                     format="nt")
    nodes, edges = load_graph_dictionary(g)
    assert len(nodes) == 4
    assert len(edges) == 3

    n1 = nodes["ENSEMBL:ENSG0000000000001"]
    assert n1["type"] == "SO:0000704"
    assert len(n1["category"]) == 4
    assert "biolink:Gene" in n1["category"]
    assert "biolink:GenomicEntity" in n1["category"]
    assert "biolink:NamedThing" in n1["category"]
    assert n1["name"] == "Test Gene 123"
    assert n1["description"] == "This is a Test Gene 123"
    assert "Test Dataset" in n1["provided_by"]

    n2 = nodes["ENSEMBL:ENSG0000000000002"]
    assert n2["type"] == "SO:0000704"
    assert len(n2["category"]) == 4
    assert "biolink:Gene" in n2["category"]
    assert "biolink:GenomicEntity" in n2["category"]
    assert "biolink:NamedThing" in n1["category"]
    assert n2["name"] == "Test Gene 456"
    assert n2["description"] == "This is a Test Gene 456"
    assert "Test Dataset" in n2["provided_by"]

    e1 = edges["ENSEMBL:ENSP0000000000001", "ENSEMBL:ENSP0000000000002"][0]
    assert e1["subject"] == "ENSEMBL:ENSP0000000000001"
    assert e1["object"] == "ENSEMBL:ENSP0000000000002"
    assert e1["predicate"] == "biolink:interacts_with"
    assert e1["relation"] == "biolink:interacts_with"
    assert e1["type"] == "biolink:Association"
    assert e1["id"] == "urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518"
    assert e1["fusion"] == "0"
    assert e1["homology"] == "0.0"
    assert e1["combined_score"] == "490.0"
    assert e1["cooccurence"] == "332"
Exemple #5
0
def test_read_nt2():
    """
    Read from an RDF N-Triple file using RdfSource.
    This test also supplies the knowledge_source parameter.
    """
    t = Transformer()
    s = RdfSource(t)

    g = s.parse(
        os.path.join(RESOURCE_DIR, "rdf", "test1.nt"),
        provided_by="Test Dataset",
        knowledge_source="Test Dataset",
    )
    nodes, edges = load_graph_dictionary(g)
    assert len(nodes) == 2
    assert len(edges) == 1

    n1 = nodes["ENSEMBL:ENSG0000000000001"]
    assert n1["type"] == "SO:0000704"
    assert len(n1["category"]) == 4
    assert "biolink:Gene" in n1["category"]
    assert "biolink:GenomicEntity" in n1["category"]
    assert "biolink:NamedThing" in n1["category"]
    assert n1["name"] == "Test Gene 123"
    assert n1["description"] == "This is a Test Gene 123"
    assert "Test Dataset" in n1["provided_by"]

    n2 = nodes["ENSEMBL:ENSG0000000000002"]
    assert n2["type"] == "SO:0000704"
    assert len(n2["category"]) == 4
    assert "biolink:Gene" in n2["category"]
    assert "biolink:GenomicEntity" in n2["category"]
    assert "biolink:NamedThing" in n1["category"]
    assert n2["name"] == "Test Gene 456"
    assert n2["description"] == "This is a Test Gene 456"
    assert "Test Dataset" in n2["provided_by"]

    e = list(edges.values())[0][0]
    assert e["subject"] == "ENSEMBL:ENSG0000000000001"
    assert e["object"] == "ENSEMBL:ENSG0000000000002"
    assert e["predicate"] == "biolink:interacts_with"
    assert e["relation"] == "biolink:interacts_with"
    assert "Test Dataset" in e["knowledge_source"]
Exemple #6
0
def test_read_nt6():
    prefix_map = {
        "HGNC":
        "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/",
        "OMIM": "http://omim.org/entry/",
    }
    node_property_predicates = {
        "http://purl.obolibrary.org/obo/RO_0002558",
        "http://purl.org/dc/elements/1.1/source",
        "https://monarchinitiative.org/frequencyOfPhenotype",
    }
    predicate_mapping = {
        "https://monarchinitiative.org/frequencyOfPhenotype":
        "frequency_of_phenotype"
    }
    filename = os.path.join(RESOURCE_DIR, "rdf", "oban-test.nt")

    t = Transformer()
    source = RdfSource(t)

    source.set_prefix_map(prefix_map)
    source.set_node_property_predicates(node_property_predicates)
    source.set_predicate_mapping(predicate_mapping)

    g = source.parse(filename=filename, format="nt")
    nodes, edges = load_graph_dictionary(g)

    assert len(nodes.keys()) == 14
    assert len(edges.keys()) == 7

    n1 = nodes["HP:0000505"]
    assert len(n1["category"]) == 1
    assert "biolink:NamedThing" in n1["category"]

    e1 = edges["OMIM:166400", "HP:0000006"][0]
    assert e1["subject"] == "OMIM:166400"
    assert e1["object"] == "HP:0000006"
    assert e1["relation"] == "RO:0000091"
    assert e1["type"] == "OBAN:association"
    assert e1["has_evidence"] == "ECO:0000501"

    e2 = edges["ORPHA:93262", "HP:0000505"][0]
    assert e2["subject"] == "ORPHA:93262"
    assert e2["object"] == "HP:0000505"
    assert e2["relation"] == "RO:0002200"
    assert e2["type"] == "OBAN:association"
    assert e2["frequency_of_phenotype"] == "HP:0040283"
def test_read_nt6():
    prefix_map = {
        'HGNC':
        'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/',
        'OMIM': 'http://omim.org/entry/',
    }
    node_property_predicates = {
        'http://purl.obolibrary.org/obo/RO_0002558',
        'http://purl.org/dc/elements/1.1/source',
        'https://monarchinitiative.org/frequencyOfPhenotype',
    }
    predicate_mapping = {
        'https://monarchinitiative.org/frequencyOfPhenotype':
        'frequency_of_phenotype'
    }
    filename = os.path.join(RESOURCE_DIR, 'rdf', 'oban-test.nt')

    source = RdfSource()
    source.set_prefix_map(prefix_map)
    source.set_node_property_predicates(node_property_predicates)
    source.set_predicate_mapping(predicate_mapping)

    g = source.parse(filename=filename, format='nt')
    nodes, edges = process_stream(g)

    assert len(nodes.keys()) == 14
    assert len(edges.keys()) == 7

    n1 = nodes['HP:0000505']
    assert len(n1['category']) == 1
    assert 'biolink:NamedThing' in n1['category']

    e1 = edges['OMIM:166400', 'HP:0000006'][0]
    assert e1['subject'] == 'OMIM:166400'
    assert e1['object'] == 'HP:0000006'
    assert e1['relation'] == 'RO:0000091'
    assert e1['type'] == 'OBAN:association'
    assert e1['has_evidence'] == 'ECO:0000501'

    e2 = edges['ORPHA:93262', 'HP:0000505'][0]
    assert e2['subject'] == 'ORPHA:93262'
    assert e2['object'] == 'HP:0000505'
    assert e2['relation'] == 'RO:0002200'
    assert e2['type'] == 'OBAN:association'
    assert e2['frequency_of_phenotype'] == 'HP:0040283'