Esempio n. 1
0
def test_read_nt4():
    """
    Read from an RDF N-Triple file using RdfSource, with user defined
    node property predicates.
    """
    node_property_predicates = {
        f"https://www.example.org/UNKNOWN/{x}"
        for x in ['fusion', 'homology', 'combined_score', 'cooccurence']
    }
    source = RdfSource()
    source.set_node_property_predicates(node_property_predicates)
    g = source.parse(filename=os.path.join(RESOURCE_DIR, 'rdf', 'test3.nt'),
                     format='nt')
    nodes, edges = process_stream(g)
    assert len(nodes.keys()) == 7
    assert len(edges.keys()) == 6

    n1 = nodes['ENSEMBL:ENSG0000000000001']
    assert n1['type'] == 'SO:0000704'
    assert len(n1['category']) == 4
    assert 'biolink:Gene' in n1['category']
    assert 'biolink:GenomicEntity' in n1['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n1['name'] == 'Test Gene 123'
    assert n1['description'] == 'This is a Test Gene 123'
    assert 'Test Dataset' in n1['provided_by']

    n2 = nodes['ENSEMBL:ENSG0000000000002']
    assert n2['type'] == 'SO:0000704'
    assert len(n2['category']) == 4
    assert 'biolink:Gene' in n2['category']
    assert 'biolink:GenomicEntity' in n2['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n2['name'] == 'Test Gene 456'
    assert n2['description'] == 'This is a Test Gene 456'
    assert 'Test Dataset' in n2['provided_by']

    e1 = edges['ENSEMBL:ENSP0000000000001', 'ENSEMBL:ENSP0000000000002'][0]
    assert e1['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e1['object'] == 'ENSEMBL:ENSP0000000000002'
    assert e1['predicate'] == 'biolink:interacts_with'
    assert e1['relation'] == 'biolink:interacts_with'
    assert e1['type'] == 'biolink:Association'
    assert e1['id'] == 'urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518'
    assert e1['fusion'] == '0'
    assert e1['homology'] == '0.0'
    assert e1['combined_score'] == '490.0'
    assert e1['cooccurence'] == '332'

    e2 = edges['ENSEMBL:ENSP0000000000001', 'UniProtKB:X0000001'][0]
    assert e2['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e2['object'] == 'UniProtKB:X0000001'
    assert e2['predicate'] == 'biolink:same_as'
    assert e2['relation'] == 'owl:equivalentClass'

    e3 = edges['ENSEMBL:ENSP0000000000001', 'MONDO:0000001'][0]
    assert e3['subject'] == 'ENSEMBL:ENSP0000000000001'
    assert e3['object'] == 'MONDO:0000001'
    assert e3['predicate'] == 'biolink:treats'
    assert e3['relation'] == 'RO:0002606'
Esempio n. 2
0
def test_read_neo(clean_slate):
    """
    Read a graph from a Neo4j instance.
    """
    driver = GraphDatabase(
        DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD
    )
    for q in queries:
        driver.query(q)
    s = NeoSource()
    g = s.parse(
        uri=DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD
    )
    nodes, edges = process_stream(g)
    assert len(nodes.keys()) == 3
    assert len(edges.keys()) == 2

    n1 = nodes['A']
    assert n1['id'] == 'A'
    assert n1['name'] == 'A'
    assert 'category' in n1 and 'biolink:NamedThing' in n1['category']

    e1 = edges[('A', 'C')][0]
    assert e1['subject'] == 'A'
    assert e1['object'] == 'C'
    assert e1['predicate'] == 'biolink:related_to'
    assert e1['relation'] == 'biolink:related_to'
Esempio n. 3
0
def test_read_nt1():
    """
    Read from an RDF N-Triple file using RdfSource.
    """
    s = RdfSource()
    g = s.parse(os.path.join(RESOURCE_DIR, 'rdf', 'test1.nt'))
    nodes, edges = process_stream(g)
    assert len(nodes) == 2
    assert len(edges) == 1

    n1 = nodes['ENSEMBL:ENSG0000000000001']
    assert n1['type'] == 'SO:0000704'
    assert len(n1['category']) == 4
    assert 'biolink:Gene' in n1['category']
    assert 'biolink:GenomicEntity' in n1['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n1['name'] == 'Test Gene 123'
    assert n1['description'] == 'This is a Test Gene 123'
    assert 'Test Dataset' in n1['provided_by']

    n2 = nodes['ENSEMBL:ENSG0000000000002']
    assert n2['type'] == 'SO:0000704'
    assert len(n2['category']) == 4
    assert 'biolink:Gene' in n2['category']
    assert 'biolink:GenomicEntity' in n2['category']
    assert 'biolink:NamedThing' in n1['category']
    assert n2['name'] == 'Test Gene 456'
    assert n2['description'] == 'This is a Test Gene 456'
    assert 'Test Dataset' in n2['provided_by']

    e = list(edges.values())[0][0]
    assert e['subject'] == 'ENSEMBL:ENSG0000000000001'
    assert e['object'] == 'ENSEMBL:ENSG0000000000002'
    assert e['predicate'] == 'biolink:interacts_with'
    assert e['relation'] == 'biolink:interacts_with'
Esempio n. 4
0
def test_read_nt6():
    prefix_map = {
        'HGNC':
        'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/',
        'OMIM': 'http://omim.org/entry/',
    }
    node_property_predicates = {
        'http://purl.obolibrary.org/obo/RO_0002558',
        'http://purl.org/dc/elements/1.1/source',
        'https://monarchinitiative.org/frequencyOfPhenotype',
    }
    predicate_mapping = {
        'https://monarchinitiative.org/frequencyOfPhenotype':
        'frequency_of_phenotype'
    }
    filename = os.path.join(RESOURCE_DIR, 'rdf', 'oban-test.nt')

    source = RdfSource()
    source.set_prefix_map(prefix_map)
    source.set_node_property_predicates(node_property_predicates)
    source.set_predicate_mapping(predicate_mapping)

    g = source.parse(filename=filename, format='nt')
    nodes, edges = process_stream(g)

    assert len(nodes.keys()) == 14
    assert len(edges.keys()) == 7

    n1 = nodes['HP:0000505']
    assert len(n1['category']) == 1
    assert 'biolink:NamedThing' in n1['category']

    e1 = edges['OMIM:166400', 'HP:0000006'][0]
    assert e1['subject'] == 'OMIM:166400'
    assert e1['object'] == 'HP:0000006'
    assert e1['relation'] == 'RO:0000091'
    assert e1['type'] == 'OBAN:association'
    assert e1['has_evidence'] == 'ECO:0000501'

    e2 = edges['ORPHA:93262', 'HP:0000505'][0]
    assert e2['subject'] == 'ORPHA:93262'
    assert e2['object'] == 'HP:0000505'
    assert e2['relation'] == 'RO:0002200'
    assert e2['type'] == 'OBAN:association'
    assert e2['frequency_of_phenotype'] == 'HP:0040283'
Esempio n. 5
0
def test_load1():
    """
    Read a SSSOM formatted file.
    """
    source = SssomSource()
    g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example1.tsv'), format='sssom')
    nodes, edges = process_stream(g)
    assert len(nodes.keys()) == 18
    assert len(edges.keys()) == 9

    assert nodes['MP:0012051']['id'] == 'MP:0012051'
    assert nodes['HP:0001257']['id'] == 'HP:0001257'

    e = edges['MP:0012051', 'HP:0001257'][0]
    assert e['subject'] == 'MP:0012051'
    assert e['object'] == 'HP:0001257'
    assert e['predicate'] == 'biolink:same_as'
    assert e['confidence'] == '1.0'
Esempio n. 6
0
def test_load3():
    """
    Read a SSSOM formatted file that has metadata provided in headers.
    """
    source = SssomSource()
    g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example3.tsv'), format='sssom')
    nodes, edges = process_stream(g)
    assert len(nodes) == 20
    assert len(edges) == 10

    e = edges['MA:0000168', 'UBERON:0000955'][0]
    assert 'mapping_provider' in e and e['mapping_provider'] == 'https://www.mousephenotype.org'
    assert 'mapping_set_group' in e and e['mapping_set_group'] == 'impc_mouse_morphology'
    assert 'mapping_set_id' in e and e['mapping_set_id'] == 'ma_uberon_impc_pat'
    assert (
        'mapping_set_title' in e
        and e['mapping_set_title']
        == 'The IMPC Mouse Morphology Mappings: Gross Pathology & Tissue Collection Test (Anatomy)'
    )
    assert 'creator_id' in e and e['creator_id'] == 'https://orcid.org/0000-0000-0000-0000'
    assert 'license' in e and e['license'] == 'https://creativecommons.org/publicdomain/zero/1.0/'
    assert 'curie_map' not in e
Esempio n. 7
0
def test_load2():
    """
    Read a SSSOM formatted file, with more metadata on mappings.
    """
    source = SssomSource()
    g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example2.tsv'), format='sssom')
    nodes, edges = process_stream(g)
    assert len(nodes.keys()) == 18
    assert len(edges.keys()) == 9

    n1 = nodes['MP:0002152']
    assert n1['id'] == 'MP:0002152'

    n2 = nodes['HP:0012443']
    assert n2['id'] == 'HP:0012443'

    e = edges['MP:0002152', 'HP:0012443'][0]
    assert e['subject'] == 'MP:0002152'
    assert e['subject_label'] == 'abnormal brain morphology'
    assert e['object'] == 'HP:0012443'
    assert e['object_label'] == 'Abnormality of brain morphology'
    assert e['predicate'] == 'biolink:exact_match'
    assert e['match_type'] == 'SSSOMC:Lexical'
    assert e['reviewer_id'] == 'orcid:0000-0000-0000-0000'