예제 #1
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #2
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #3
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        sparql_query = """
                      SELECT ?assoc
                      WHERE {
                            ?assoc OBO:SEPIO_0000007 ?evidenceline .
                            ?evidenceline a OBO:ECO_0000015 ;
                                OBO:SEPIO_0000084 ?measure1 ;
                                OBO:SEPIO_0000084 ?measure2 ;
                                OBO:SEPIO_0000085 _:study  .

                            ?measure1 a OBO:OBI_0000175 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 1.637023e-10 .

                            ?measure2 a OBO:STATO_0000085 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 "8.885439E-007" .

                      }
                      """
        sparql_output = impc.graph.query(sparql_query)
        expected_results = [(self.assoc_iri,)]

        self.assertEqual(list(sparql_output), expected_results)
예제 #4
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True) # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        triples = """
    :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> .
    
    <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ;
        SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>,
            <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ;
        SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

    <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 1.637023e-10 .

    <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #5
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', False)
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        sparql_query = """
                      SELECT ?assoc
                      WHERE {
                            ?assoc OBO:SEPIO_0000007 ?evidenceline .
                            ?evidenceline a OBO:ECO_0000015 ;
                                OBO:SEPIO_0000084 ?measure1 ;
                                OBO:SEPIO_0000084 ?measure2 ;
                                OBO:SEPIO_0000085 _:study  .

                            ?measure1 a OBO:OBI_0000175 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 1.637023e-10 .

                            ?measure2 a OBO:STATO_0000085 ;
                                OBO:RO_0002353 _:study ;
                                OBO:STATO_0000129 "8.885439E-007" .

                      }
                      """
        sparql_output = impc.graph.query(sparql_query)
        expected_results = [(self.assoc_iri, )]

        self.assertEqual(list(sparql_output), expected_results)
예제 #6
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    LOG.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_name,project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[11:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_name,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        LOG.info(
            "Row %i graph as ntriples:\n%s\n",
            line_to_test, impc.graph.serialize(format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        LOG.info(
            "Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)
예제 #7
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)
예제 #8
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    logger.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[12:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_fullname,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        logger.info(
            "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize(
                format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        logger.info("Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)
예제 #9
0
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        # init impc (make this a function?)
        impc = IMPC('rdf_graph', True)
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    test_set = row
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(impc_map, impress_map, phenotyping_center,
                                   colony, project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
                      SELECT *
                      WHERE {
                          ?assoc OBO:SEPIO_0000007 ?evidenceline .
                          ?evidenceline a OBO:ECO_0000015 ;
                              OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> .

                          ?study a OBO:OBI_0000471 ;
                              OBO:SEPIO_0000114 ?param ;
                              OBO:SEPIO_0000017 ?agent .
                      }
                      """

        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)