예제 #1
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #2
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        sparql_query = """
                      SELECT ?study
                      WHERE {
                          <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ;
                              rdfs:label "MGP Select Pipeline" .

                          <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
                              rdfs:label "X-ray" .

                          <http://www.sanger.ac.uk/> a foaf:organization ;
                              rdfs:label "WTSI" .

                          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
                              rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

                          <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
                              rdfs:label "Number of ribs right (X-ray)" .

                          ?study a OBO:OBI_0000471 ;
                              OBO:BFO_0000051 OBO:STATO_0000076 ;
                              OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ;
                              OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ;
                              OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
                              OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
                              OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> .

                          ?colony a owl:NamedIndividual ;
                              rdfs:label "MEFW" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)

        # This will fail if we change our approach for
        # making blank node iris, it might be better
        # to check the length of the output (see test_provenance_mode)
        study = BNode('bbdd05a8ca155dda')
        expected_output = [(study,)]

        self.assertEqual(list(sparql_output), expected_output)
예제 #3
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(impc_map, impress_map, phenotyping_center,
                                   colony, project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #4
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        triples = """
    <https://monarchinitiative.org/.well-known/genid/bbdd05a8ca155dda> a OBI:0000471 ;
      BFO:0000051 OBO:STATO_0000076,
          <https://www.mousephenotype.org/impress/protocol/175/15> ;
      BFO:0000050  IMPRESS-procedure:15 ,
          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
      SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
      SEPIO:0000017 <http://www.sanger.ac.uk/>  .
      
    <https://monarchinitiative.org/.well-known/genid/bc0b26361b8687b5> a owl:NamedIndividual ;
        rdfs:label "MEFW" .

    <http://www.sanger.ac.uk/> a foaf:organization ;
        rdfs:label "WTSI" .

    <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
        rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

    <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
        rdfs:label "Number of ribs right (X-ray)" .

    IMPRESS-procedure:15 a owl:NamedIndividual ;
        rdfs:label "MGP Select Pipeline" .

    <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
        rdfs:label "X-ray" .
"""
        # dbg
        logger.debug("Reference graph: %s",
                     impc.graph.serialize(format="turtle")
                               .decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #5
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(impc_map, impress_map, phenotyping_center,
                                   colony, project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        sparql_query = """
                      SELECT ?study
                      WHERE {
                          <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ;
                              rdfs:label "MGP Select Pipeline" .

                          <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
                              rdfs:label "X-ray" .

                          <http://www.sanger.ac.uk/> a foaf:organization ;
                              rdfs:label "WTSI" .

                          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
                              rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

                          <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
                              rdfs:label "Number of ribs right (X-ray)" .

                          ?study a OBO:OBI_0000471 ;
                              OBO:BFO_0000051 OBO:STATO_0000076 ;
                              OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ;
                              OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ;
                              OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
                              OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
                              OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> .

                          ?colony a owl:NamedIndividual ;
                              rdfs:label "MEFW" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)

        # This will fail if we change our approach for
        # making blank node iris, it might be better
        # to check the length of the output (see test_provenance_mode)
        study = BNode('bbdd05a8ca155dda')
        expected_output = [(study, )]

        self.assertEqual(list(sparql_output), expected_output)