def test_read_nt4(): """ Read from an RDF N-Triple file using RdfSource, with user defined node property predicates. """ node_property_predicates = { f"https://www.example.org/UNKNOWN/{x}" for x in ['fusion', 'homology', 'combined_score', 'cooccurence'] } source = RdfSource() source.set_node_property_predicates(node_property_predicates) g = source.parse(filename=os.path.join(RESOURCE_DIR, 'rdf', 'test3.nt'), format='nt') nodes, edges = process_stream(g) assert len(nodes.keys()) == 7 assert len(edges.keys()) == 6 n1 = nodes['ENSEMBL:ENSG0000000000001'] assert n1['type'] == 'SO:0000704' assert len(n1['category']) == 4 assert 'biolink:Gene' in n1['category'] assert 'biolink:GenomicEntity' in n1['category'] assert 'biolink:NamedThing' in n1['category'] assert n1['name'] == 'Test Gene 123' assert n1['description'] == 'This is a Test Gene 123' assert 'Test Dataset' in n1['provided_by'] n2 = nodes['ENSEMBL:ENSG0000000000002'] assert n2['type'] == 'SO:0000704' assert len(n2['category']) == 4 assert 'biolink:Gene' in n2['category'] assert 'biolink:GenomicEntity' in n2['category'] assert 'biolink:NamedThing' in n1['category'] assert n2['name'] == 'Test Gene 456' assert n2['description'] == 'This is a Test Gene 456' assert 'Test Dataset' in n2['provided_by'] e1 = edges['ENSEMBL:ENSP0000000000001', 'ENSEMBL:ENSP0000000000002'][0] assert e1['subject'] == 'ENSEMBL:ENSP0000000000001' assert e1['object'] == 'ENSEMBL:ENSP0000000000002' assert e1['predicate'] == 'biolink:interacts_with' assert e1['relation'] == 'biolink:interacts_with' assert e1['type'] == 'biolink:Association' assert e1['id'] == 'urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518' assert e1['fusion'] == '0' assert e1['homology'] == '0.0' assert e1['combined_score'] == '490.0' assert e1['cooccurence'] == '332' e2 = edges['ENSEMBL:ENSP0000000000001', 'UniProtKB:X0000001'][0] assert e2['subject'] == 'ENSEMBL:ENSP0000000000001' assert e2['object'] == 'UniProtKB:X0000001' assert e2['predicate'] == 'biolink:same_as' assert e2['relation'] == 'owl:equivalentClass' e3 = edges['ENSEMBL:ENSP0000000000001', 'MONDO:0000001'][0] assert e3['subject'] == 'ENSEMBL:ENSP0000000000001' assert e3['object'] == 'MONDO:0000001' assert e3['predicate'] == 'biolink:treats' assert e3['relation'] == 'RO:0002606'
def test_read_neo(clean_slate): """ Read a graph from a Neo4j instance. """ driver = GraphDatabase( DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD ) for q in queries: driver.query(q) s = NeoSource() g = s.parse( uri=DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD ) nodes, edges = process_stream(g) assert len(nodes.keys()) == 3 assert len(edges.keys()) == 2 n1 = nodes['A'] assert n1['id'] == 'A' assert n1['name'] == 'A' assert 'category' in n1 and 'biolink:NamedThing' in n1['category'] e1 = edges[('A', 'C')][0] assert e1['subject'] == 'A' assert e1['object'] == 'C' assert e1['predicate'] == 'biolink:related_to' assert e1['relation'] == 'biolink:related_to'
def test_read_nt1(): """ Read from an RDF N-Triple file using RdfSource. """ s = RdfSource() g = s.parse(os.path.join(RESOURCE_DIR, 'rdf', 'test1.nt')) nodes, edges = process_stream(g) assert len(nodes) == 2 assert len(edges) == 1 n1 = nodes['ENSEMBL:ENSG0000000000001'] assert n1['type'] == 'SO:0000704' assert len(n1['category']) == 4 assert 'biolink:Gene' in n1['category'] assert 'biolink:GenomicEntity' in n1['category'] assert 'biolink:NamedThing' in n1['category'] assert n1['name'] == 'Test Gene 123' assert n1['description'] == 'This is a Test Gene 123' assert 'Test Dataset' in n1['provided_by'] n2 = nodes['ENSEMBL:ENSG0000000000002'] assert n2['type'] == 'SO:0000704' assert len(n2['category']) == 4 assert 'biolink:Gene' in n2['category'] assert 'biolink:GenomicEntity' in n2['category'] assert 'biolink:NamedThing' in n1['category'] assert n2['name'] == 'Test Gene 456' assert n2['description'] == 'This is a Test Gene 456' assert 'Test Dataset' in n2['provided_by'] e = list(edges.values())[0][0] assert e['subject'] == 'ENSEMBL:ENSG0000000000001' assert e['object'] == 'ENSEMBL:ENSG0000000000002' assert e['predicate'] == 'biolink:interacts_with' assert e['relation'] == 'biolink:interacts_with'
def test_read_nt6(): prefix_map = { 'HGNC': 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/', 'OMIM': 'http://omim.org/entry/', } node_property_predicates = { 'http://purl.obolibrary.org/obo/RO_0002558', 'http://purl.org/dc/elements/1.1/source', 'https://monarchinitiative.org/frequencyOfPhenotype', } predicate_mapping = { 'https://monarchinitiative.org/frequencyOfPhenotype': 'frequency_of_phenotype' } filename = os.path.join(RESOURCE_DIR, 'rdf', 'oban-test.nt') source = RdfSource() source.set_prefix_map(prefix_map) source.set_node_property_predicates(node_property_predicates) source.set_predicate_mapping(predicate_mapping) g = source.parse(filename=filename, format='nt') nodes, edges = process_stream(g) assert len(nodes.keys()) == 14 assert len(edges.keys()) == 7 n1 = nodes['HP:0000505'] assert len(n1['category']) == 1 assert 'biolink:NamedThing' in n1['category'] e1 = edges['OMIM:166400', 'HP:0000006'][0] assert e1['subject'] == 'OMIM:166400' assert e1['object'] == 'HP:0000006' assert e1['relation'] == 'RO:0000091' assert e1['type'] == 'OBAN:association' assert e1['has_evidence'] == 'ECO:0000501' e2 = edges['ORPHA:93262', 'HP:0000505'][0] assert e2['subject'] == 'ORPHA:93262' assert e2['object'] == 'HP:0000505' assert e2['relation'] == 'RO:0002200' assert e2['type'] == 'OBAN:association' assert e2['frequency_of_phenotype'] == 'HP:0040283'
def test_load1(): """ Read a SSSOM formatted file. """ source = SssomSource() g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example1.tsv'), format='sssom') nodes, edges = process_stream(g) assert len(nodes.keys()) == 18 assert len(edges.keys()) == 9 assert nodes['MP:0012051']['id'] == 'MP:0012051' assert nodes['HP:0001257']['id'] == 'HP:0001257' e = edges['MP:0012051', 'HP:0001257'][0] assert e['subject'] == 'MP:0012051' assert e['object'] == 'HP:0001257' assert e['predicate'] == 'biolink:same_as' assert e['confidence'] == '1.0'
def test_load3(): """ Read a SSSOM formatted file that has metadata provided in headers. """ source = SssomSource() g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example3.tsv'), format='sssom') nodes, edges = process_stream(g) assert len(nodes) == 20 assert len(edges) == 10 e = edges['MA:0000168', 'UBERON:0000955'][0] assert 'mapping_provider' in e and e['mapping_provider'] == 'https://www.mousephenotype.org' assert 'mapping_set_group' in e and e['mapping_set_group'] == 'impc_mouse_morphology' assert 'mapping_set_id' in e and e['mapping_set_id'] == 'ma_uberon_impc_pat' assert ( 'mapping_set_title' in e and e['mapping_set_title'] == 'The IMPC Mouse Morphology Mappings: Gross Pathology & Tissue Collection Test (Anatomy)' ) assert 'creator_id' in e and e['creator_id'] == 'https://orcid.org/0000-0000-0000-0000' assert 'license' in e and e['license'] == 'https://creativecommons.org/publicdomain/zero/1.0/' assert 'curie_map' not in e
def test_load2(): """ Read a SSSOM formatted file, with more metadata on mappings. """ source = SssomSource() g = source.parse(filename=os.path.join(RESOURCE_DIR, 'sssom_example2.tsv'), format='sssom') nodes, edges = process_stream(g) assert len(nodes.keys()) == 18 assert len(edges.keys()) == 9 n1 = nodes['MP:0002152'] assert n1['id'] == 'MP:0002152' n2 = nodes['HP:0012443'] assert n2['id'] == 'HP:0012443' e = edges['MP:0002152', 'HP:0012443'][0] assert e['subject'] == 'MP:0002152' assert e['subject_label'] == 'abnormal brain morphology' assert e['object'] == 'HP:0012443' assert e['object_label'] == 'Abnormality of brain morphology' assert e['predicate'] == 'biolink:exact_match' assert e['match_type'] == 'SSSOMC:Lexical' assert e['reviewer_id'] == 'orcid:0000-0000-0000-0000'