def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> . <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ; SEPIO:0000018 <https://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <https://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg logger.info( "Assertion graph:\n %s\n", impc.graph.serialize( format="turtle").decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> . <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ; SEPIO:0000018 <https://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <https://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg LOG.info( "Assertion graph:\n %s\n", impc.graph.serialize( format="turtle").decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie, impc_map) sparql_query = """ SELECT * WHERE { MONARCH:test_association OBO:SEPIO_0000015 ?assertion. ?assertion a OBO:SEPIO_0000001 ; OBO:SEPIO_0000018 <http://www.mousephenotype.org/> ; OBO:SEPIO_0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <http://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1)
def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> . <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>, <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) sparql_query = """ SELECT ?assoc WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000084 ?measure1 ; OBO:SEPIO_0000084 ?measure2 ; OBO:SEPIO_0000085 _:study . ?measure1 a OBO:OBI_0000175 ; OBO:RO_0002353 _:study ; OBO:STATO_0000129 1.637023e-10 . ?measure2 a OBO:STATO_0000085 ; OBO:RO_0002353 _:study ; OBO:STATO_0000129 "8.885439E-007" . } """ sparql_output = impc.graph.query(sparql_query) expected_results = [(self.assoc_iri,)] self.assertEqual(list(sparql_output), expected_results)
def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> . <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>, <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impc._add_assertion_provenance( self.assoc_curie, self.evidence_curie, impc_map) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> . <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> a SEPIO:0000001 ; SEPIO:0000018 <http://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <http://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg logger.debug("Reference graph: %s", impc.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) sparql_query = """ SELECT ?assoc WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000084 ?measure1 ; OBO:SEPIO_0000084 ?measure2 ; OBO:SEPIO_0000085 _:study . ?measure1 a OBO:OBI_0000175 ; OBO:RO_0002353 _:study ; OBO:STATO_0000129 1.637023e-10 . ?measure2 a OBO:STATO_0000085 ; OBO:RO_0002353 _:study ; OBO:STATO_0000129 "8.885439E-007" . } """ sparql_output = impc.graph.query(sparql_query) expected_results = [(self.assoc_iri, )] self.assertEqual(list(sparql_output), expected_results)
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance(phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # dbg LOG.info("Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8")) triples = """ <https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg LOG.debug("Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
class IMPCTestCase(SourceTestCase): def setUp(self): self.source = IMPC('rdf_graph', True) # Skolem Yes self.source.settestonly(True) self._setDirToSource() return def tearDown(self): self.source = None return
class IMPCTestCase(SourceTestCase): def setUp(self): self.source = IMPC('rdf_graph', True) self.source.settestonly(True) self._setDirToSource() return def tearDown(self): self.source = None return
def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 # init impc (make this a function?) impc = IMPC('rdf_graph', True) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) # fetch file impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count == line_to_test: test_set = row self.test_set_1 = row break # Some DRY violation with the above tests (phenotyping_center, colony) = row[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = row[12:19] (statistical_method, resource_name) = row[26:28] (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) impc._add_study_provenance(impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test sparql_query = """ SELECT * WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> . ?study a OBO:OBI_0000471 ; OBO:SEPIO_0000114 ?param ; OBO:SEPIO_0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1)
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_name, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[11:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( phenotyping_center, colony, project_name, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # dbg LOG.info( "Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8") ) triples = """ <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <https://monarchinitiative.org/.well-known/genid/b6f14f763c8d0629360e> a OBI:0000471 ; BFO:0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project>, IMPC-pipe:MGP_001 ; BFO:0000051 STATO:0000076, IMPC-proc:MGP_XRY_001 ; SEPIO:0000017 <http://www.sanger.ac.uk/> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "MGP" . <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPC-pipe:MGP_001 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . IMPC-proc:MGP_XRY_001 a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg LOG.info( "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8") ) self.assertTrue( self.test_util.test_graph_equality(triples, impc.graph))
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, 0) # dbg logger.info( "Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8") ) triples = """ <https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg logger.debug( "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8") ) self.assertTrue( self.test_util.test_graph_equality(triples, impc.graph))
def setUp(self): self.source = IMPC('rdf_graph', True) self.source.settestonly(True) self._setDirToSource() return
def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 # init impc (make this a function?) impc = IMPC('rdf_graph', True) impress_map = json.loads( impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8')) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) # fetch file impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count == line_to_test: test_set = row self.test_set_1 = row break # Some DRY violation with the above tests (phenotyping_center, colony) = row[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = row[12:19] (statistical_method, resource_name) = row[26:28] (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test sparql_query = """ SELECT * WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> . ?study a OBO:OBI_0000471 ; OBO:SEPIO_0000114 ?param ; OBO:SEPIO_0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1)
def setUp(self): self.source = IMPC('rdf_graph', True) # Skolem Yes self.source.settestonly(True) self._setDirToSource() return
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) sparql_query = """ SELECT ?study WHERE { <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . ?study a OBO:OBI_0000471 ; OBO:BFO_0000051 OBO:STATO_0000076 ; OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ; OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> . ?colony a owl:NamedIndividual ; rdfs:label "MEFW" . } """ sparql_output = impc.graph.query(sparql_query) # This will fail if we change our approach for # making blank node iris, it might be better # to check the length of the output (see test_provenance_mode) study = BNode('bbdd05a8ca155dda') expected_output = [(study,)] self.assertEqual(list(sparql_output), expected_output)
def setUp(self): self.source = IMPC() self.source.settestonly(True) self.source.setnobnodes(True) self._setDirToSource() return
def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 impc = IMPC('rdf_graph', False) # Not Skolem self.test_set_N = [] # fetch file # impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count < line_to_test: continue elif count == line_to_test: self.test_set_N = row elif count > line_to_test: LOG.info("stopped at line:\t%s\n", count) break # Some DRY violation with the above tests (phenotyping_center, colony) = self.test_set_N[2:4] (project_name,project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_N[11:19] (statistical_method, resource_name) = self.test_set_N[26:28] (p_value, percentage_change, effect_size) = self.test_set_N[23:26] # adding evidence impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) # adding study impc._add_study_provenance( phenotyping_center, colony, project_name, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, line_to_test) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test # dbg LOG.info( "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize(format="ntriples").decode("utf-8") ) sparql_query = """ SELECT * WHERE { ?assoc SEPIO:0000007 ?evidenceline . ?evidenceline a ECO:0000015 ; SEPIO:0000085 _:study . ?study a OBI:0000471 ; SEPIO:0000114 ?param ; SEPIO:0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) LOG.info( "Test that query for row %i passes and returns one row", int(line_to_test)) # print("Sparql Output: %s\n", list(sparql_output) ) # it is an array with one list with five vars in it self.assertEqual(len(list(sparql_output)), 1)
def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 impc = IMPC('rdf_graph', False) # Not Skolem self.test_set_N = [] # fetch file # impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count < line_to_test: continue elif count == line_to_test: self.test_set_N = row elif count > line_to_test: logger.info("stopped at line:\t%s\n", count) break # Some DRY violation with the above tests (phenotyping_center, colony) = self.test_set_N[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_N[12:19] (statistical_method, resource_name) = self.test_set_N[26:28] (p_value, percentage_change, effect_size) = self.test_set_N[23:26] # adding evidence impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) # adding study impc._add_study_provenance( phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, line_to_test) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test # dbg logger.info( "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize( format="ntriples").decode("utf-8") ) sparql_query = """ SELECT * WHERE { ?assoc SEPIO:0000007 ?evidenceline . ?evidenceline a ECO:0000015 ; SEPIO:0000085 _:study . ?study a OBI:0000471 ; SEPIO:0000114 ?param ; SEPIO:0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) logger.info("Test that query for row %i passes and returns one row", int(line_to_test)) # print("Sparql Output: %s\n", list(sparql_output) ) # it is an array with one list with five vars in it self.assertEqual(len(list(sparql_output)), 1)
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance(impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) sparql_query = """ SELECT ?study WHERE { <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . ?study a OBO:OBI_0000471 ; OBO:BFO_0000051 OBO:STATO_0000076 ; OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ; OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> . ?colony a owl:NamedIndividual ; rdfs:label "MEFW" . } """ sparql_output = impc.graph.query(sparql_query) # This will fail if we change our approach for # making blank node iris, it might be better # to check the length of the output (see test_provenance_mode) study = BNode('bbdd05a8ca155dda') expected_output = [(study, )] self.assertEqual(list(sparql_output), expected_output)