def __init__(self, identifier, title, url, description=None, license_url=None, data_rights=None, graph_type=None, file_handle=None): if graph_type is None: self.graph = RDFGraph() elif graph_type == 'streamed_graph': self.graph = StreamedGraph(True, file_handle=file_handle) elif graph_type == 'rdf_graph': self.graph = RDFGraph() self.model = Model(self.graph) self.identifier = ':' + identifier self.version = None self.date_issued = None # The data_accesed value is later used as an object literal of properties such as dct:issued, which needs to conform xsd:dateTime format. # self.date_accessed = datetime.now().strftime('%Y-%m-%d-%H-%M') self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') self.citation = set() self.license = license_url self.model.addType(self.identifier, 'dctypes:Dataset') self.graph.addTriple(self.identifier, 'dct:title', title, True) self.graph.addTriple(self.identifier, 'dct:identifier', identifier, object_is_literal=True) self.graph.addTriple(self.identifier, 'foaf:page', url) # maybe in the future add the logo here: # schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> . # TODO add the licence info # FIXME:Temporarily making this in IF statement, # can revert after all current resources are updated. if license_url is not None: self.graph.addTriple(self.identifier, 'dct:license', license_url) else: logger.debug('No license provided.') if data_rights is not None: self.graph.addTriple(self.identifier, 'dct:rights', data_rights, object_is_literal=True) else: logger.debug('No rights provided.') if description is not None: self.model.addDescription(self.identifier, description) return
def test_germline_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8") ) expected_triples = """ MONARCH:ba2ac5d2153c70e2bb98 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004013 ; OBAN:association_has_subject HGNC:30497 . ENSEMBL:ENSG00000166813 a owl:Class . HGNC:30497 a owl:Class ; RO:0004013 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" ; owl:equivalentClass ENSEMBL:ENSG00000166813, ORPHA:268061 . ORPHA:268061 a owl:Class . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue(self.test_util.test_graph_equality( expected_triples, self.orphanet.graph)) return
def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_data = { 'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?', 'chrom_num': '9;9;9;9', 'chrom_pos': '36998996;37002118;37000690;36997420', 'context': 'intron_variant; intron_variant; intron_variant; intron_variant', 'allele_freq': 'NR', 'trait': 'Intelligence', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337', 'pvalue': '0.00000004', 'merged': '0', 'snp_id_current': '', 'mapped_gene': 'PAX5; PAX5; PAX5; PAX5', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '656 European ancestry individuals from ADHD families', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [795637]', 'pubmed': '22449649' }
def test_parse(self): self.source.graph = RDFGraph(True) # Reset graph self.assertTrue(len(list(self.source.graph)) == 0) self.source.parse() triples = """ UNII:46U771ERWK RO:0002606 SNOMED:386761002 ; rdfs:subClassOf CHEBI:23367 . SNOMED:386761002 rdfs:label "Local anesthesia" ; rdfs:subClassOf MONDO:0000001 . UNII:46U771ERWK biolink:category biolink:ChemicalSubstance . SNOMED:386761002 biolink:category biolink:Disease . """ # dbg logger.debug( "Reference graph: %s", self.source.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, self.source.graph))
def test_gene_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle") .decode("utf-8") ) expected_triples = """ MONARCH:bdbeb077e365ddedda20 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004015 ; OBAN:association_has_subject ORPHA:268061 . ORPHA:268061 RO:0004015 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue(self.test_util.test_graph_equality( expected_triples, self.orphanet.graph)) return
def test_patient_phenotype_model(self): """ functional test for _parse_patient_phenotypes() """ udp = UDP('rdf_graph', True) udp.graph = RDFGraph(True) # test that graph is empty self.assertTrue(len(list(udp.graph)) == 0) mock_lines = [ 'patient_1\tHP:000001\tyes', 'patient_1\tHP:000002\tno' ] mock_data = MagicMock() mock_data.__iter__.return_value = iter(mock_lines) mock_file = mock_open(mock=mock_data) udp._parse_patient_phenotypes(mock_file) triples = """ :patient_1 a foaf:Person ; rdfs:label "patient_1" ; RO:0002200 DOID:4, HP:000001 . """ self.assertTrue(self.test_util.test_graph_equality( triples, udp.graph))
def testSGDParser(self): sgd = SGD('rdf_graph', True) sgd.graph = RDFGraph(True) record = self.test_set_1 sgd.make_association(record) description = sgd._make_description(record) triples = """ :MONARCH_ba748c98c0f167739128 a OBAN:association ; OBO:RO_0002558 OBO:APO_0000020 ; dcterms:description "{0}"; dcterms:source PMID:21715656 ; OBAN:association_has_object MONARCH:APO_0000309APO_0000245 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject SGD:S000007268 . SGD:S000007268 rdfs:label "ATP6" ; RO:0002200 MONARCH:APO_0000309APO_0000245 . APO:0000020 rdfs:label "classical genetics" . PMID:21715656 a OBO:IAO_0000311 ; owl:sameAs SGD_REF:S000145858 . MONARCH:APO_0000309APO_0000245 rdfs:label "respiratory growth:decreased rate" ; rdfs:subClassOf UPHENO:0001001 . """.format(description) # test exact contents of graph self.assertTrue(self.test_util.test_graph_equality(triples, sgd.graph))
def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> . <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>, <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def test_parse(self): for rcv in RCVS: output_nt = rcv + '.nt' input_xml = rcv + '.xml.gz' reference_ttl = TTL_PATH + rcv + '.ttl' with self.subTest(rcv=rcv): mock_args = [ "test_clinvar.py", "--inputdir", XML_PATH, "--filename", input_xml, "--mapfile", MAP_FILE, "--destination", NT_PATH, "--output", output_nt ] patch('sys.argv', mock_args).start() clinvar_parse() query_graph = RDFGraph() query_graph.bind_all_namespaces() query_graph.parse(NT_PATH + output_nt, format='nt') with open(reference_ttl, 'r') as ref_fh: ref_graph = "\n".join(ref_fh.readlines()) # debug LOG.debug( "Reference graph: %s", query_graph.serialize(format="turtle").decode("utf-8")) # Convert output from ClinVar parse to dot then png dot_file_path = DOT_PATH + rcv + ".dot" with open(dot_file_path, 'w') as dot_file: rdf2dot(query_graph, dot_file) self.assertTrue( TestUtils.test_graph_equality(ref_graph, query_graph))
def test_graph_equality(turtlish, graph): """ :param turtlish: String of triples in turtle format without prefix header :param graph: Graph object to test against :return: Boolean, True if graphs contain same set of triples """ turtle_graph = RDFGraph() turtle_graph.bind_all_namespaces() prefixes = "\n".join([ "@prefix {}: <{}> .".format(n[0], n[1]) for n in turtle_graph.namespace_manager.namespaces() ]) turtle_string = prefixes + turtlish mock_file = io.StringIO(turtle_string) turtle_graph.parse(mock_file, format="turtle") turtle_triples = set(list(turtle_graph)) ref_triples = set(list(graph)) equality = turtle_triples == ref_triples if not equality: LOG.warning( "Triples do not match\n" "\tLeft hand difference: %s\n" "\tRight hand difference: %s", sorted(turtle_triples - ref_triples), sorted(ref_triples - turtle_triples)) return equality
def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> . <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ; SEPIO:0000018 <https://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <https://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg LOG.info( "Assertion graph:\n %s\n", impc.graph.serialize( format="turtle").decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
def testEnsemblReactomeParser(self): reactome = Reactome('rdf_graph', True) reactome.graph = RDFGraph(True) self.assertTrue(len(list(reactome.graph)) == 0) eco_map = Reactome.get_eco_map(Reactome.map_files['eco_map']) (gene, pathway_id, pathway_iri, pathway_label, go_ecode, species_name) = self.test_set_1 reactome._add_component_pathway_association(eco_map, gene, 'ENSEMBL', pathway_id, 'REACT', pathway_label, go_ecode) triples = """ ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 . :MONARCH_b582c188b7ec20016206 a OBAN:association ; OBO:RO_0002558 ECO:0000501 ; OBAN:association_has_object REACT:R-BTA-3000480 ; OBAN:association_has_predicate RO:0002331 ; OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 . REACT:R-BTA-3000480 a owl:Class ; rdfs:label "Scavenging by Class A Receptors" ; rdfs:subClassOf GO:0009987, PW:0000001 . """ self.assertTrue( self.test_util.test_graph_equality(triples, reactome.graph))
def test_germline_lof_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml' self.orphanet._process_diseasegene(limit=None) logger.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b53dada0eb229a75e705 OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object Orphanet:938475 ; OBAN:association_has_predicate RO:0003303 ; OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> . Orphanet:268061 a owl:Class ; rdfs:label "KS1" ; dc:description "kinesin family member 7" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ; rdfs:label "germline loss of function variant of KS1" ; GENO:0000418 Orphanet:268061 ; RO:0003303 Orphanet:938475 ; :MONARCH_anonymous true ; :has_cell_origin GENO:0000900 ; :has_functional_consequence SO:0002054 . Orphanet:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph))
def test_germline_lof_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b9ad1b0c562ad4db3f1e a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004012 ; OBAN:association_has_subject ORPHA:268061 . ORPHA:268061 RO:0004012 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . ECO:0000322 biolink:category biolink:EvidenceType . ORPHA:268061 biolink:category biolink:Gene . ORPHA:268061 biolink:category biolink:Genotype . ORPHA:938475 biolink:category biolink:Disease . MONARCH:b9ad1b0c562ad4db3f1e biolink:category biolink:Association . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) return
def test_gene_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml' self.orphanet._process_diseasegene(limit=None) logger.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b64684a0ea6ae59fdb09 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object Orphanet:938475 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject Orphanet:268061 . Orphanet:268061 a owl:Class ; rdfs:label "KS1" ; RO:0003304 Orphanet:938475 ; dc:description "kinesin family member 7" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . Orphanet:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph))
def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) # Reset graph self.source.graph.bind_all_namespaces() self.test_data = { 'snp_label': 'rs1491921-C', 'chrom_num': '5', 'chrom_pos': '21259029', 'context': 'intergenic_variant', 'allele_freq': '0.013', 'trait': 'Diisocyanate-induced asthma', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949', 'pvalue': '0.0000007', 'merged': '0', 'snp_id_current': '1491921', 'mapped_gene': 'LOC102723561 - GUSBP1', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [1556551]', 'pubmed': '25918132' }
def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map('9606') [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()] print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map))) # just looking # for key in prot_map: # if string_db.graph.curie_regexp.match(prot_map[key]) is None: # print("INVALID curie for %s from %s", prot_map[key], key) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, '9606') # g1 <interacts with> g2 triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))
def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ efo_ontology = RDFGraph() logger.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() logger.info("Finished loading EFO ontology") variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association(variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) sparql_query = """ SELECT ?snp WHERE {{ <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ; dc:description "{}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . ?snp OBO:RO_0002326 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . }} """.format(description) sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )] self.assertEqual(results, expected)
def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ self.assertTrue(len(list(self.source.graph)) == 0) efo_ontology = RDFGraph() LOG.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() LOG.info("Finished loading EFO ontology") variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association( variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) triples = """ MONARCH:bffc7a930c08cc8fe931 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . MONARCH:bff9b97458d67ed7f517 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . dbSNP:rs1491921 RO:0003304 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . """.format(description) # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance(phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # dbg LOG.info("Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8")) triples = """ <https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg LOG.debug("Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def setUp(self): self.test_util = TestUtils() self.source = CTD('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_row = [ 'Nicotine', 'D009538', '', 'TOBACCO ADDICTION, SUSCEPTIBILITY TO', 'OMIM:188890', 'therapeutic', '', '', '', '12345|56789' ] return
def test_variant_model(self): """ functional test for _parse_patient_variants() """ udp = UDP('rdf_graph', True) udp.graph = RDFGraph(True) # test that graph is empty self.assertTrue(len(list(udp.graph)) == 0) data = ['patient_1', 'family_1', '1', 'HG19', '155230432', 'G', 'A', 'Maternal', 'Biallelic', 'Non-synonymous;DOWNSTREAM', 'CLK2', '', '', '', '', '', '', '', 'Compound heterozygous', 'Heterozygous', '', '0.002747253', ''] test_data = "\t".join(data) mock_lines = [test_data] mock_data = MagicMock() mock_data.__iter__.return_value = iter(mock_lines) mock_file = mock_open(mock=mock_data) udp._parse_patient_variants(mock_file) triples = """ :patient_1 GENO:0000222 <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> . <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> a SO:0001059 ; rdfs:label "hg19chr1(CLK2):g.155230432G>A" ; GENO:0000418 HGNC:2069 ; RO:0002162 NCBITaxon:9606 ; owl:sameAs dbSNP:rs11557757 . <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> a GENO:0000000 ; rdfs:label "patient_1 genotype" ; GENO:0000382 <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> . """ self.assertTrue(self.test_util.test_graph_equality(triples, udp.graph))
def test_unmapped_disease_assoc_type(self): """ Test that a gene disease type that we have not mapped in translationtable/orphanet.yaml raises a ValueError """ self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml' self.assertRaises( ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
def setUp(self): self.graph = RDFGraph() self.curie_map = curie_map.get() self.genotype = Genotype(self.graph) self.cutil = CurieUtil(self.curie_map) self.test_cat_pred = self.cutil.get_uri(blv.terms['category']) self.test_cat_genotype_category = self.cutil.get_uri( blv.terms['Genotype']) self.test_cat_background_category = self.cutil.get_uri( blv.terms['PopulationOfIndividualOrganisms'])
def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, 9606) self.assertEqual(len(string_db.graph), 0)
def test_sex_specificity_model(self): self.mgi.graph = RDFGraph(True) # Reset graph self.mgi._process_evidence_view(limit=None) logger.debug("Reference graph: %s", self.mgi.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ :association RO:0002558 ECO:0000006 ; dc:source J:74619 ; :has_sex_specificity PATO:0000384 . J:74619 a IAO:0000310 . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.mgi.graph))
def read_graph_from_turtle_file(self, f): """ This will read the specified file into a graph. A simple parsing test. :param f: :return: """ vg = RDFGraph() p = os.path.abspath(f) logger.info("Testing reading turtle file from %s", p) vg.parse(f, format="turtle") logger.info('Found %s graph nodes in %s', len(vg), p) self.assertTrue(len(vg) > 0, "No nodes found in " + p) return
def setUp(self): self.graph = RDFGraph() this_curie_map = curie_map.get() self.cutil = CurieUtil(this_curie_map) # stuff to make test triples self.test_cat_subj = "http://www.google.com" self.test_cat_default_pred = self.cutil.get_uri("biolink:category") self.test_cat_nondefault_pred = self.cutil.get_uri("rdf:type") self.test_cat_default_category = self.cutil.get_uri( "biolink:NamedThing") self.test_cat_nondefault_category = self.cutil.get_uri("biolink:Gene") self.test_cat_type = self.cutil.get_uri("rdf:type") self.test_cat_class = self.cutil.get_uri("rdf:class")
def setUp(self): g = RDFGraph() self.model = Model(g) this_curie_map = curie_map.get() self.cutil = CurieUtil(this_curie_map) # stuff to make test triples self.test_cat_subj_curie = "MGI:1234" self.test_cat_subj = self.cutil.get_uri("MGI:1234") self.test_cat_default_pred = self.cutil.get_uri("biolink:category") self.test_named_indiv = self.cutil.get_uri("owl:NamedIndividual") self.test_label_pred = self.cutil.get_uri("rdfs:label") self.test_label = "some label" self.test_comment_IRI = self.cutil.get_uri("rdfs:comment") self.test_comment = 'bonus eruptus'
def test_graph_equality(turtlish, graph): """ :param turtlish: file path or string of triples in turtle format without prefix header :param graph: Graph object to test against :return: Boolean, True if graphs contain same set of triples """ turtle_graph = RDFGraph() turtle_graph.bind_all_namespaces() prefixes = "\n".join([ "@prefix {}: <{}> .".format(n[0], n[1]) for n in turtle_graph.namespace_manager.namespaces() ]) headless_ttl = '' try: if Path(turtlish).exists(): headless_ttl = Path(turtlish).read_text() else: raise OSError except OSError: if isinstance(turtlish, str): headless_ttl = turtlish else: raise ValueError("turtlish must be filepath or string") turtle_string = prefixes + headless_ttl mock_file = io.StringIO(turtle_string) turtle_graph.parse(mock_file, format="turtle") TestUtils.remove_ontology_axioms(graph) turtle_triples = set(list(turtle_graph)) ref_triples = set(list(graph)) equality = turtle_triples == ref_triples if not equality: LOG.warning( "Triples do not match\n" "\tLeft hand difference: %s\n" "\tRight hand difference: %s", sorted(turtle_triples - ref_triples), sorted(ref_triples - turtle_triples)) return equality