def test_read_nt3(): """ Read from an RDF N-Triple file using RdfSource, with user defined node property predicates. """ node_property_predicates = { f"https://www.example.org/UNKNOWN/{x}" for x in ["fusion", "homology", "combined_score", "cooccurence"] } t = Transformer() source = RdfSource(t) source.set_node_property_predicates(node_property_predicates) g = source.parse(filename=os.path.join(RESOURCE_DIR, "rdf", "test2.nt"), format="nt") nodes, edges = load_graph_dictionary(g) assert len(nodes) == 4 assert len(edges) == 3 n1 = nodes["ENSEMBL:ENSG0000000000001"] assert n1["type"] == "SO:0000704" assert len(n1["category"]) == 4 assert "biolink:Gene" in n1["category"] assert "biolink:GenomicEntity" in n1["category"] assert "biolink:NamedThing" in n1["category"] assert n1["name"] == "Test Gene 123" assert n1["description"] == "This is a Test Gene 123" assert "Test Dataset" in n1["provided_by"] n2 = nodes["ENSEMBL:ENSG0000000000002"] assert n2["type"] == "SO:0000704" assert len(n2["category"]) == 4 assert "biolink:Gene" in n2["category"] assert "biolink:GenomicEntity" in n2["category"] assert "biolink:NamedThing" in n1["category"] assert n2["name"] == "Test Gene 456" assert n2["description"] == "This is a Test Gene 456" assert "Test Dataset" in n2["provided_by"] e1 = edges["ENSEMBL:ENSP0000000000001", "ENSEMBL:ENSP0000000000002"][0] assert e1["subject"] == "ENSEMBL:ENSP0000000000001" assert e1["object"] == "ENSEMBL:ENSP0000000000002" assert e1["predicate"] == "biolink:interacts_with" assert e1["relation"] == "biolink:interacts_with" assert e1["type"] == "biolink:Association" assert e1["id"] == "urn:uuid:fcf76807-f909-4ccb-b40a-3b79b49aa518" assert e1["fusion"] == "0" assert e1["homology"] == "0.0" assert e1["combined_score"] == "490.0" assert e1["cooccurence"] == "332"
def test_read_nt6(): prefix_map = { "HGNC": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/", "OMIM": "http://omim.org/entry/", } node_property_predicates = { "http://purl.obolibrary.org/obo/RO_0002558", "http://purl.org/dc/elements/1.1/source", "https://monarchinitiative.org/frequencyOfPhenotype", } predicate_mapping = { "https://monarchinitiative.org/frequencyOfPhenotype": "frequency_of_phenotype" } filename = os.path.join(RESOURCE_DIR, "rdf", "oban-test.nt") t = Transformer() source = RdfSource(t) source.set_prefix_map(prefix_map) source.set_node_property_predicates(node_property_predicates) source.set_predicate_mapping(predicate_mapping) g = source.parse(filename=filename, format="nt") nodes, edges = load_graph_dictionary(g) assert len(nodes.keys()) == 14 assert len(edges.keys()) == 7 n1 = nodes["HP:0000505"] assert len(n1["category"]) == 1 assert "biolink:NamedThing" in n1["category"] e1 = edges["OMIM:166400", "HP:0000006"][0] assert e1["subject"] == "OMIM:166400" assert e1["object"] == "HP:0000006" assert e1["relation"] == "RO:0000091" assert e1["type"] == "OBAN:association" assert e1["has_evidence"] == "ECO:0000501" e2 = edges["ORPHA:93262", "HP:0000505"][0] assert e2["subject"] == "ORPHA:93262" assert e2["object"] == "HP:0000505" assert e2["relation"] == "RO:0002200" assert e2["type"] == "OBAN:association" assert e2["frequency_of_phenotype"] == "HP:0040283"
def test_read_nt2(): """ Read from an RDF N-Triple file using RdfSource. This test also supplies the knowledge_source parameter. """ t = Transformer() s = RdfSource(t) g = s.parse( os.path.join(RESOURCE_DIR, "rdf", "test1.nt"), provided_by="Test Dataset", knowledge_source="Test Dataset", ) nodes, edges = load_graph_dictionary(g) assert len(nodes) == 2 assert len(edges) == 1 n1 = nodes["ENSEMBL:ENSG0000000000001"] assert n1["type"] == "SO:0000704" assert len(n1["category"]) == 4 assert "biolink:Gene" in n1["category"] assert "biolink:GenomicEntity" in n1["category"] assert "biolink:NamedThing" in n1["category"] assert n1["name"] == "Test Gene 123" assert n1["description"] == "This is a Test Gene 123" assert "Test Dataset" in n1["provided_by"] n2 = nodes["ENSEMBL:ENSG0000000000002"] assert n2["type"] == "SO:0000704" assert len(n2["category"]) == 4 assert "biolink:Gene" in n2["category"] assert "biolink:GenomicEntity" in n2["category"] assert "biolink:NamedThing" in n1["category"] assert n2["name"] == "Test Gene 456" assert n2["description"] == "This is a Test Gene 456" assert "Test Dataset" in n2["provided_by"] e = list(edges.values())[0][0] assert e["subject"] == "ENSEMBL:ENSG0000000000001" assert e["object"] == "ENSEMBL:ENSG0000000000002" assert e["predicate"] == "biolink:interacts_with" assert e["relation"] == "biolink:interacts_with" assert "Test Dataset" in e["knowledge_source"]