예제 #1
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> .
    <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .

        """
        # dbg
        logger.info(
            "Assertion graph:\n %s\n", impc.graph.serialize(
                format="turtle").decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
예제 #2
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> .
    <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .
        """
        # dbg
        LOG.info(
            "Assertion graph:\n %s\n", impc.graph.serialize(
                format="turtle").decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
예제 #3
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie,
                                       impc_map)

        sparql_query = """
                      SELECT *
                      WHERE {
                          MONARCH:test_association OBO:SEPIO_0000015 ?assertion.
                          ?assertion a OBO:SEPIO_0000001 ;
                              OBO:SEPIO_0000018 <http://www.mousephenotype.org/> ;
                              OBO:SEPIO_0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

                          <http://www.mousephenotype.org/> a foaf:organization ;
                              rdfs:label "International Mouse Phenotyping Consortium" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #4
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #5
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        sparql_query = """
                      SELECT ?assoc
                      WHERE {
                            ?assoc OBO:SEPIO_0000007 ?evidenceline .
                            ?evidenceline a OBO:ECO_0000015 ;
                                OBO:SEPIO_0000084 ?measure1 ;
                                OBO:SEPIO_0000084 ?measure2 ;
                                OBO:SEPIO_0000085 _:study  .

                            ?measure1 a OBO:OBI_0000175 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 1.637023e-10 .

                            ?measure2 a OBO:STATO_0000085 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 "8.885439E-007" .

                      }
                      """
        sparql_output = impc.graph.query(sparql_query)
        expected_results = [(self.assoc_iri,)]

        self.assertEqual(list(sparql_output), expected_results)
예제 #6
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        impc._add_assertion_provenance(self.assoc_curie,
                                       self.evidence_curie, impc_map)

        sparql_query = """
                      SELECT *
                      WHERE {
                          MONARCH:test_association OBO:SEPIO_0000015 ?assertion.
                          ?assertion a OBO:SEPIO_0000001 ;
                              OBO:SEPIO_0000018 <http://www.mousephenotype.org/> ;
                              OBO:SEPIO_0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

                          <http://www.mousephenotype.org/> a foaf:organization ;
                              rdfs:label "International Mouse Phenotyping Consortium" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #7
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #8
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True) # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        triples = """
    :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> .
    
    <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ;
        SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>,
            <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ;
        SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

    <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 1.637023e-10 .

    <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #9
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        impc._add_assertion_provenance(
            self.assoc_curie, self.evidence_curie, impc_map)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> .
    <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> a SEPIO:0000001 ;
        SEPIO:0000018 <http://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <http://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .

        """
        # dbg
        logger.debug("Reference graph: %s",
                     impc.graph.serialize(format="turtle")
                                      .decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #10
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        sparql_query = """
                      SELECT ?assoc
                      WHERE {
                            ?assoc OBO:SEPIO_0000007 ?evidenceline .
                            ?evidenceline a OBO:ECO_0000015 ;
                                OBO:SEPIO_0000084 ?measure1 ;
                                OBO:SEPIO_0000084 ?measure2 ;
                                OBO:SEPIO_0000085 _:study  .

                            ?measure1 a OBO:OBI_0000175 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 1.637023e-10 .

                            ?measure2 a OBO:STATO_0000085 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 "8.885439E-007" .

                      }
                      """
        sparql_output = impc.graph.query(sparql_query)
        expected_results = [(self.assoc_iri, )]

        self.assertEqual(list(sparql_output), expected_results)
예제 #11
0
파일: test_impc.py 프로젝트: sgml/dipper
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(phenotyping_center, colony,
                                   project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # dbg
        LOG.info("Provenance graph as turtle:\n%s\n",
                 impc.graph.serialize(format="turtle").decode("utf-8"))

        triples = """
<https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ;
  BFO:0000051 OBO:STATO_0000076,
      <https://www.mousephenotype.org/impress/protocol/175/15> ;
  BFO:0000050  IMPRESS-procedure:15 ,
      <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
  SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
  SEPIO:0000017 <http://www.sanger.ac.uk/>  .

<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

<https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPRESS-procedure:15 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        LOG.debug("Reference graph: %s",
                  impc.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #12
0
class IMPCTestCase(SourceTestCase):
    def setUp(self):
        self.source = IMPC('rdf_graph', True)  # Skolem Yes
        self.source.settestonly(True)
        self._setDirToSource()
        return

    def tearDown(self):
        self.source = None
        return
예제 #13
0
class IMPCTestCase(SourceTestCase):

    def setUp(self):
        self.source = IMPC('rdf_graph', True)
        self.source.settestonly(True)
        self._setDirToSource()
        return

    def tearDown(self):
        self.source = None
        return
예제 #14
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(impc_map, impress_map, phenotyping_center,
                                   colony, project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #15
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center,
         colony) = self.test_set_1[2:4]
        (project_name,
         project_fullname,
         pipeline_name,
         pipeline_stable_id,
         procedure_stable_id,
         procedure_name,
         parameter_stable_id,
         parameter_name) = self.test_set_1[11:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            phenotyping_center, colony,
            project_name,
            pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # dbg
        LOG.info(
            "Provenance graph as turtle:\n%s\n",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )

        triples = """
<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<https://monarchinitiative.org/.well-known/genid/b6f14f763c8d0629360e> a OBI:0000471 ;
    BFO:0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project>,
        IMPC-pipe:MGP_001 ;
    BFO:0000051 STATO:0000076,
        IMPC-proc:MGP_XRY_001 ;
    SEPIO:0000017 <http://www.sanger.ac.uk/> ;
    SEPIO:0000114 <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "MGP" .

<https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPC-pipe:MGP_001 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

IMPC-proc:MGP_XRY_001 a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        LOG.info(
            "Reference graph: %s",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )
        self.assertTrue(
            self.test_util.test_graph_equality(triples, impc.graph))
예제 #16
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            phenotyping_center, colony,
            project_fullname,
            pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, 0)

        # dbg
        logger.info(
            "Provenance graph as turtle:\n%s\n",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )

        triples = """
<https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ;
  BFO:0000051 OBO:STATO_0000076,
      <https://www.mousephenotype.org/impress/protocol/175/15> ;
  BFO:0000050  IMPRESS-procedure:15 ,
      <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
  SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
  SEPIO:0000017 <http://www.sanger.ac.uk/>  .

<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

<https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPRESS-procedure:15 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        logger.debug(
            "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8")
        )
        self.assertTrue(
            self.test_util.test_graph_equality(triples, impc.graph))
예제 #17
0
 def setUp(self):
     self.source = IMPC('rdf_graph', True)
     self.source.settestonly(True)
     self._setDirToSource()
     return
예제 #18
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #19
0
 def setUp(self):
     self.source = IMPC('rdf_graph', True)  # Skolem Yes
     self.source.settestonly(True)
     self._setDirToSource()
     return
예제 #20
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        sparql_query = """
                      SELECT ?study
                      WHERE {
                          <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ;
                              rdfs:label "MGP Select Pipeline" .

                          <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
                              rdfs:label "X-ray" .

                          <http://www.sanger.ac.uk/> a foaf:organization ;
                              rdfs:label "WTSI" .

                          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
                              rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

                          <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
                              rdfs:label "Number of ribs right (X-ray)" .

                          ?study a OBO:OBI_0000471 ;
                              OBO:BFO_0000051 OBO:STATO_0000076 ;
                              OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ;
                              OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ;
                              OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
                              OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
                              OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> .

                          ?colony a owl:NamedIndividual ;
                              rdfs:label "MEFW" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)

        # This will fail if we change our approach for
        # making blank node iris, it might be better
        # to check the length of the output (see test_provenance_mode)
        study = BNode('bbdd05a8ca155dda')
        expected_output = [(study,)]

        self.assertEqual(list(sparql_output), expected_output)
예제 #21
0
파일: test_impc.py 프로젝트: d3borah/dipper
 def setUp(self):
     self.source = IMPC()
     self.source.settestonly(True)
     self.source.setnobnodes(True)
     self._setDirToSource()
     return
예제 #22
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    LOG.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_name,project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[11:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_name,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        LOG.info(
            "Row %i graph as ntriples:\n%s\n",
            line_to_test, impc.graph.serialize(format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        LOG.info(
            "Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)
예제 #23
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    logger.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[12:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_fullname,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        logger.info(
            "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize(
                format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        logger.info("Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)
예제 #24
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(impc_map, impress_map, phenotyping_center,
                                   colony, project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        sparql_query = """
                      SELECT ?study
                      WHERE {
                          <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ;
                              rdfs:label "MGP Select Pipeline" .

                          <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
                              rdfs:label "X-ray" .

                          <http://www.sanger.ac.uk/> a foaf:organization ;
                              rdfs:label "WTSI" .

                          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
                              rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

                          <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
                              rdfs:label "Number of ribs right (X-ray)" .

                          ?study a OBO:OBI_0000471 ;
                              OBO:BFO_0000051 OBO:STATO_0000076 ;
                              OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ;
                              OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ;
                              OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
                              OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
                              OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> .

                          ?colony a owl:NamedIndividual ;
                              rdfs:label "MEFW" .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)

        # This will fail if we change our approach for
        # making blank node iris, it might be better
        # to check the length of the output (see test_provenance_mode)
        study = BNode('bbdd05a8ca155dda')
        expected_output = [(study, )]

        self.assertEqual(list(sparql_output), expected_output)