Exemple #1
0
    def test_parse(self):
        for rcv in RCVS:
            output_nt = rcv + '.nt'
            input_xml = rcv + '.xml.gz'
            reference_ttl = TTL_PATH + rcv + '.ttl'
            with self.subTest(rcv=rcv):

                mock_args = [
                    "test_clinvar.py", "--inputdir", XML_PATH, "--filename",
                    input_xml, "--mapfile", MAP_FILE, "--destination", NT_PATH,
                    "--output", output_nt
                ]

                patch('sys.argv', mock_args).start()
                clinvar_parse()
                query_graph = RDFGraph()
                query_graph.bind_all_namespaces()
                query_graph.parse(NT_PATH + output_nt, format='nt')

                with open(reference_ttl, 'r') as ref_fh:
                    ref_graph = "\n".join(ref_fh.readlines())

                # debug
                LOG.debug(
                    "Reference graph: %s",
                    query_graph.serialize(format="turtle").decode("utf-8"))

                # Convert output from ClinVar parse to dot then png
                dot_file_path = DOT_PATH + rcv + ".dot"
                with open(dot_file_path, 'w') as dot_file:
                    rdf2dot(query_graph, dot_file)

                self.assertTrue(
                    TestUtils.test_graph_equality(ref_graph, query_graph))
Exemple #2
0
class EvidenceTestCase(unittest.TestCase):
    def setUp(self):
        """
        Because _process_evidence_view uses
        self.rawdir to find the evidence file,
        the defaults are overriden here to
        point to our test file
        Note the file name must match what is in
        that method - evidence_view
        """
        self.test_util = TestUtils()
        self.mgi = MGI('rdf_graph', True)
        self.mgi.rawdir = os.path.join(os.path.dirname(__file__),
                                       'resources/mgi')
        self.mgi.idhash['annot']['6901981'] = ':association'

    def tearDown(self):
        self.mgi = None
        return

    def test_sex_specificity_model(self):
        self.mgi.graph = RDFGraph(True)  # Reset graph
        self.mgi._process_evidence_view(limit=None)
        logger.debug("Reference graph: %s",
                     self.mgi.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
        :association RO:0002558 ECO:0000006 ;
            dc:source J:74619 ;
            :has_sex_specificity PATO:0000384 .

        J:74619 a IAO:0000310 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.mgi.graph))
Exemple #3
0
class TestMyChemParser(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.source = MyChem('rdf_graph', True)

        # Replaces source.fetch()
        data_fh = open(TESTDATA, 'r')
        self.test_data = json.load(data_fh)
        data_fh.close()
        self.source.drugbank_targets.append(self.test_data[0])
        self.source.drugcentral_interactors.append(self.test_data[0])

    def tearDown(self):
        self.source = None

    def test_parse(self):
        self.source.graph = RDFGraph(True)  # Reset graph
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source.parse()

        triples = """
        UNII:46U771ERWK RO:0002606 SNOMED:386761002 ;
            rdfs:subClassOf CHEBI:23367 .

        SNOMED:386761002 rdfs:label "Local anesthesia" ;
            rdfs:subClassOf DOID:4 .
        """

        # dbg
        logger.debug(
            "Reference graph: %s",
            self.source.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(
            self.test_util.test_graph_equality(triples, self.source.graph))
    def test_parse(self):
        """
        Runs WormBase.parse() and outputs dot file for each allele
        This is less of a unit test and more for viewing the
        output of an entire run on a single allele,
        dot files can be converted to images using
        scripts/dot-to-svg.sh
        """
        for variant in VARIANTS:
            with self.subTest(variant_id=variant):
                self.tearDownAndSetUp()
                self.gwascatalog.rawdir = RAW_PATH + '/' + variant
                self.gwascatalog.parse()
                dot_file_path = DOT_PATH + variant + ".dot"
                with open(dot_file_path, 'w') as dot_file:
                    rdf2dot(self.gwascatalog.graph, dot_file)

                # debug
                LOG.debug(
                    "Reference graph: %s",
                    self.gwascatalog.graph.serialize(
                        format="turtle").decode("utf-8"))

                reference_ttl = TTL_PATH + variant + '.ttl'

                self.assertTrue(
                    TestUtils.test_graph_equality(reference_ttl,
                                                  self.gwascatalog.graph))
Exemple #5
0
class SGDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = {'Allele': 'atp6-L183R (L183R)',
                           'Chemical': 'glycerol',
                           'Condition': 'elevated temperature (35 deg C)|nonfermentable carbon source',
                           'Details': 'similar results obtained with atp6-L247R, and atp6-W136R, all '
                                      'corresponding to human NARP syndrome mutants',
                           'Experiment Type': 'classical genetics',
                           'Feature Name': 'Q0085',
                           'Feature Type': 'ORF',
                           'Gene Name': 'ATP6',
                           'Mutant Type': 'reduction of function',
                           'Phenotype': 'respiratory growth: decreased rate',
                           'Reference': 'PMID: 21715656|SGD_REF: S000145858',
                           'Reporter': ' ',
                           'SGDID': 'S000007268',
                           'Strain Background': 'Other'}

        return

    def tearDown(self):
        return

    def testSGDParser(self):
        sgd = SGD('rdf_graph', True)
        sgd.graph = RDFGraph(True)
        record = self.test_set_1
        sgd.make_association(record)

        description = sgd._make_description(record)

        triples = """
        :MONARCH_95158d413dd73476 a OBAN:association ;
            OBO:RO_0002558 OBO:APO_0000020 ;
            dc:description "{0}";
            dc:source PMID:21715656 ;
            OBAN:association_has_object MONARCH:OBO_APO_0000309OBO_APO_0000245 ;
            OBAN:association_has_predicate OBO:RO_0002200 ;
            OBAN:association_has_subject SGD:S000007268 .
            
        SGD:S000007268 rdfs:label "ATP6" ;
        RO:0002200 MONARCH:OBO_APO_0000309OBO_APO_0000245 .

        APO:0000020 rdfs:label "classical genetics" .

        PMID:21715656 a OBO:IAO_0000311 ;
        owl:sameAs SGD_REF:S000145858 .

        MONARCH:OBO_APO_0000309OBO_APO_0000245 rdfs:label "respiratory growth:decreased rate" ;
        rdfs:subClassOf UPHENO:0001001 .

        """.format(description)
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(
            triples, sgd.graph))
Exemple #6
0
class RGDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = {'aspect': 'N',
                           'date': '2006-10-26',
                           'evidence': {'has_supporting_reference': ['RGD:1581841', 'PMID:12799311'],
                                        'type': 'IED',
                                        'with_support_from': []},
                           'negated': False,
                           'object': {'id': 'MP:0003340', 'taxon': 'NCBITaxon:10116'},
                           'provided_by': 'RGD',
                           'qualifiers': [],
                           'relation': {'id': None},
                           'source_line': 'RGD\t2535\tEdnra\t\tMP:0003340\tRGD:1581841|PMID:12799311\t'
                                          'IED\t\tN\tendothelin receptor type A\t\tgene\ttaxon:10116\t'
                                          '20061026\tRGD\t\t\n',
                           'subject': {'fullname': 'endothelin receptor type A',
                                       'id': 'RGD:2535',
                                       'label': 'Ednra',
                                       'synonyms': [],
                                       'taxon': {'id': 'NCBITaxon:10116'},
                                       'type': 'gene'},
                           'subject_extensions': [{'filler': '\n', 'property': 'isoform'}]}

        return

    def tearDown(self):
        return

    def testRGDParser(self):
        rgd = RGD('rdf_graph', True)
        rgd.graph = RDFGraph(True)

        self.assertTrue(len(list(rgd.graph)) == 0)

        rgd.make_association(record=self.test_set_1)
        triples = """
    :MONARCH_b4650e8c3d865f11a1a5 a OBAN:association ;
        RO:0002558 ECO:0005611 ;
        dc:source RGDRef:1581841 ;
        OBAN:association_has_object OBO:MP_0003340 ;
        OBAN:association_has_predicate OBO:RO_0002200 ;
        OBAN:association_has_subject RGD:2535 ;
        pav:createdOn "2006-10-26" .
    
    RGD:2535 OBO:RO_0002200 MP:0003340 .
        RGDRef:1581841 a IAO:0000311 ;
        owl:sameAs PMID:12799311 .
        """
        # dbg
        logger.debug("Reference graph: %s",
                     rgd.graph.serialize(format="turtle")
                              .decode("utf-8")
        )
        self.assertTrue(self.test_util.test_graph_equality(
            triples, rgd.graph))
Exemple #7
0
class SGDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = {
            'Allele': 'atp6-L183R (L183R)',
            'Chemical': 'glycerol',
            'Condition': 'elevated temperature (35 deg C)|nonfermentable carbon source',
            'Details': 'similar results obtained with atp6-L247R, and atp6-W136R, all '
                    'corresponding to human NARP syndrome mutants',
            'Experiment Type': 'classical genetics',
            'Feature Name': 'Q0085',
            'Feature Type': 'ORF',
            'Gene Name': 'ATP6',
            'Mutant Type': 'reduction of function',
            'Phenotype': 'respiratory growth: decreased rate',
            'Reference': 'PMID: 21715656|SGD_REF: S000145858',
            'Reporter': ' ',
            'SGDID': 'S000007268',
            'Strain Background': 'Other'}

        return

    def tearDown(self):
        return

    def testSGDParser(self):
        sgd = SGD('rdf_graph', True)
        sgd.graph = RDFGraph(True)
        record = self.test_set_1
        sgd.make_association(record)

        description = sgd._make_description(record)

        triples = """
        :MONARCH_ba748c98c0f167739128 a OBAN:association ;
            OBO:RO_0002558 OBO:APO_0000020 ;
            dc:description "{0}";
            dc:source PMID:21715656 ;
            OBAN:association_has_object MONARCH:APO_0000309APO_0000245 ;
            OBAN:association_has_predicate OBO:RO_0002200 ;
            OBAN:association_has_subject SGD:S000007268 .
            
        SGD:S000007268 rdfs:label "ATP6" ;
        RO:0002200 MONARCH:APO_0000309APO_0000245 .

        APO:0000020 rdfs:label "classical genetics" .

        PMID:21715656 a OBO:IAO_0000311 ;
        owl:sameAs SGD_REF:S000145858 .

        MONARCH:APO_0000309APO_0000245 rdfs:label "respiratory growth:decreased rate" ;
        rdfs:subClassOf UPHENO:0001001 .

        """.format(description)
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(triples, sgd.graph))
Exemple #8
0
class CTDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.source = CTD('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_row = [
            'Nicotine',
            'D009538',
            '',
            'TOBACCO ADDICTION, SUSCEPTIBILITY TO',
            'OMIM:188890',
            'therapeutic',
            '',
            '',
            '',
            '12345|56789'
        ]
        return

    def tearDown(self):
        self.source = None
        return

    def test_therapeutic_relationship(self):
        # test that graph is empty
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source._process_interactions(self.test_row)

        triples = """
            :MONARCH_b6c289df47cb72653f79 a OBAN:association ;
                RO:0002558 ECO:0000033 ;
                dcterms:source PMID:12345, PMID:56789 ;
                OBAN:association_has_object OMIM:188890 ;
                OBAN:association_has_predicate RO:0002606 ;
                OBAN:association_has_subject MESH:D009538 .

            MESH:D009538 a owl:Class ;
                rdfs:label "Nicotine" ;
                biolink:category biolink:ChemicalSubstance ;
                RO:0002606 OMIM:188890 .

            PMID:12345 a IAO:0000013 .

            PMID:56789 a IAO:0000013 .

            OMIM:188890 a owl:Class ;
                biolink:category biolink:DiseaseOrPhenotypicFeature .
        """
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(
            triples, self.source.graph))
Exemple #9
0
class CTDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.source = CTD('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_row = [
            'Nicotine',
            'D009538',
            '',
            'TOBACCO ADDICTION, SUSCEPTIBILITY TO',
            'OMIM:188890',
            'therapeutic',
            '',
            '',
            '',
            '12345|56789'
        ]
        return

    def tearDown(self):
        self.source = None
        return

    def test_therapeutic_relationship(self):
        # test that graph is empty
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source._process_interactions(self.test_row)

        triples = """
            :MONARCH_b6c289df47cb72653f79 a OBAN:association ;
                RO:0002558 ECO:0000033 ;
                dc:source PMID:12345, PMID:56789 ;
                OBAN:association_has_object OMIM:188890 ;
                OBAN:association_has_predicate RO:0002606 ;
                OBAN:association_has_subject MESH:D009538 .
            
            MESH:D009538 a owl:Class ;
                rdfs:label "Nicotine" ;
                RO:0002606 OMIM:188890 .
                
            PMID:12345 a IAO:0000013 .
            PMID:56789 a IAO:0000013 .
            
            OMIM:188890 a owl:Class .
        """
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(
            triples, self.source.graph))
Exemple #10
0
class ReactomeTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = \
            ('ENSBTAP00000013354',
             'R-BTA-3000480',
             'http://www.reactome.org/PathwayBrowser/#/R-BTA-3000480',
             'Scavenging by Class A Receptors',
             'IEA',
             'Bos taurus')
        self.gaf_eco = {"IEA": "ECO:0000501"}
        return

    def tearDown(self):
        return

    def testEnsemblReactomeParser(self):
        '''

        '''
        reactome = Reactome('rdf_graph', True)
        reactome.graph = RDFGraph(True)
        self.assertTrue(len(list(reactome.graph)) == 0)
        # reactome.parse_gaf_eco('gaf-eco-mapping')

        (gene, pathway_id, pathway_iri, pathway_label, go_ecode,
         species_name) = self.test_set_1
        reactome._add_component_pathway_association('ENSEMBL:' + gene,
                                                    'REACT:' + pathway_id,
                                                    pathway_label,
                                                    self.gaf_eco[go_ecode])

        triples = """
        ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 .

        :MONARCH_b582c188b7ec20016206 a OBAN:association ;
            OBO:RO_0002558 ECO:0000501 ;
            OBAN:association_has_object REACT:R-BTA-3000480 ;
            OBAN:association_has_predicate RO:0002331 ;
            OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 .

        REACT:R-BTA-3000480 a owl:Class ;
            rdfs:label "Scavenging by Class A Receptors" ;
            rdfs:subClassOf GO:0009987,
                PW:0000001 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(triples, reactome.graph))
Exemple #11
0
    def test_gene_xref(self):
        """
        test FlyBase._process_gene_xref()
        """
        for allele in ALLELES:
            with self.subTest(allele_id=allele):
                self.tearDownAndSetUp()
                self.flybase.rawdir = RAW_PATH + '/' + allele
                self.flybase._process_gene_xref(limit=None)
                LOG.debug(
                    "Reference graph: %s",
                    self.flybase.graph.serialize(format="turtle").decode("utf-8"))

                reference_ttl = TTL_PATH + allele + '/' + 'gene_xref.ttl'
                self.assertTrue(TestUtils.test_graph_equality(
                    reference_ttl, self.flybase.graph))
Exemple #12
0
class ReactomeTestCase(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = \
            ('ENSBTAP00000013354', 'R-BTA-3000480',
             'http://www.reactome.org/PathwayBrowser/#/R-BTA-3000480',
             'Scavenging by Class A Receptors',	'IEA', 'Bos taurus')
        return

    def tearDown(self):
        return

    def testEnsemblReactomeParser(self):
        reactome = Reactome('rdf_graph', True)
        reactome.graph = RDFGraph(True)
        self.assertTrue(len(list(reactome.graph)) == 0)

        eco_map = Reactome.get_eco_map(Reactome.map_files['eco_map'])
        (gene, pathway_id, pathway_iri, pathway_label,
         go_ecode, species_name) = self.test_set_1
        reactome._add_component_pathway_association(
            eco_map, gene, 'ENSEMBL', pathway_id,
            'REACT', pathway_label, go_ecode)

        triples = """
        ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 .
        
        :MONARCH_b582c188b7ec20016206 a OBAN:association ;
            OBO:RO_0002558 ECO:0000501 ;
            OBAN:association_has_object REACT:R-BTA-3000480 ;
            OBAN:association_has_predicate RO:0002331 ;
            OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 .

        REACT:R-BTA-3000480 a owl:Class ;
            rdfs:label "Scavenging by Class A Receptors" ;
            rdfs:subClassOf GO:0009987,
                PW:0000001 .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            triples, reactome.graph))
Exemple #13
0
class TestMyChemParser(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        self.source = MyChem('rdf_graph', True)

        # Replaces source.fetch()
        data_fh = open(TESTDATA, 'r')
        self.test_data = json.load(data_fh)
        data_fh.close()
        self.source.drugbank_targets.append(self.test_data[0])
        self.source.drugcentral_interactors.append(self.test_data[0])

    def tearDown(self):
        self.source = None

    def test_parse(self):
        self.source.graph = RDFGraph(True)  # Reset graph
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source.parse()

        triples = """
        UNII:46U771ERWK RO:0002606 SNOMED:386761002 ;
            rdfs:subClassOf CHEBI:23367 .

        SNOMED:386761002 rdfs:label "Local anesthesia" ;
            rdfs:subClassOf DOID:4 .
        """

        # dbg
        logger.debug("Reference graph: %s",
                     self.source.graph.serialize(format="turtle")
                                      .decode("utf-8")
        )
        self.assertTrue(self.test_util.test_graph_equality(
            triples, self.source.graph))
Exemple #14
0
class EvidenceTestCase(unittest.TestCase):

    def setUp(self):
        """
        Because _process_evidence_view uses
        self.rawdir to find the evidence file,
        the defaults are overriden here to
        point to our test file
        Note the file name must match what is in
        that method - evidence_view
        """
        self.test_util = TestUtils()
        self.mgi = MGI('rdf_graph', True)
        self.mgi.rawdir = os.path.join(
            os.path.dirname(__file__), 'resources/mgi')
        self.mgi.idhash['annot']['6901981'] = ':association'

    def tearDown(self):
        self.mgi = None
        return

    def test_sex_specificity_model(self):
        self.mgi.graph = RDFGraph(True)  # Reset graph

        self.mgi._process_evidence_view(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.mgi.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
        :association RO:0002558 ECO:0000006 ;
            dc:source J:74619 ;
            :has_sex_specificity PATO:0000384 .

        J:74619 a IAO:0000310 .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.mgi.graph))
Exemple #15
0
class StringTestFakeData(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        # Test set with two proteins from same species
        self.test_set_1 = \
            [['9606.ENSP00000000233', '9606.ENSP00000003084',
             0, 0, 0, 0, 300, 0, 150, 800]]

        # Test set with deprecated protein id
        self.test_set_2 = \
            [['9606.ENSP00000000233', '9606.ENSP00000006101',
              0, 0, 0, 0, 300, 0, 150, 800]]

        self.columns = [
            'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence',
            'coexpression', 'experimental', 'database', 'textmining',
            'combined_score'
        ]

        ensembl = Ensembl('rdf_graph', True)
        self.protein_list = ensembl.fetch_protein_gene_map(9606)

        return

    def tearDown(self):
        return

    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map(9606)
        for key in prot_map.keys():
            prot_map[key] = "ENSEMBL:{}".format(prot_map[key])

        print("Finished fetching ENSP IDs, fetched {} proteins".format(
            len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, 9606)

        triples = """
            ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(
            self.test_util.test_graph_equality(triples, string_db.graph))

    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, 9606)
        self.assertEqual(len(string_db.graph), 0)
Exemple #16
0
class GeneVariantDiseaseTest(unittest.TestCase):
    def setUp(self):
        """
        """
        self.test_util = TestUtils()
        self.orphanet = Orphanet('rdf_graph', True)
        self.orphanet.rawdir = os.path.join(os.path.dirname(__file__),
                                            'resources/orphanet')

    def tearDown(self):
        self.orphanet = None
        return

    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:ba2ac5d2153c70e2bb98 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004013 ;
    OBAN:association_has_subject HGNC:30497 .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class ;
    RO:0004013 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
       ORPHA:268061 .

ORPHA:268061 a owl:Class .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
    
ENSEMBL:ENSG00000166813 biolink:category biolink:Gene .
ECO:0000322 biolink:category biolink:EvidenceType .
HGNC:30497 biolink:category biolink:Genotype .
HGNC:30497 biolink:category biolink:Gene .
ORPHA:268061 biolink:category biolink:Gene .
ORPHA:938475 biolink:category biolink:Disease .

MONARCH:ba2ac5d2153c70e2bb98 biolink:category biolink:Association .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
        return

    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b9ad1b0c562ad4db3f1e a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004012 ;
    OBAN:association_has_subject ORPHA:268061 .

ORPHA:268061 RO:0004012 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
    
ECO:0000322 biolink:category biolink:EvidenceType .
ORPHA:268061 biolink:category biolink:Gene .
ORPHA:268061 biolink:category biolink:Genotype .
ORPHA:938475 biolink:category biolink:Disease .
    
MONARCH:b9ad1b0c562ad4db3f1e biolink:category biolink:Association .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
        return

    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:bdbeb077e365ddedda20 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004015 ;
    OBAN:association_has_subject ORPHA:268061 .

ORPHA:268061 RO:0004015 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
    
ECO:0000322 biolink:category biolink:EvidenceType .
ORPHA:268061 biolink:category biolink:Gene .
ORPHA:268061 biolink:category biolink:Genotype .
ORPHA:938475 biolink:category biolink:Disease .

MONARCH:bdbeb077e365ddedda20 biolink:category biolink:Association .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
        return

    def test_unmapped_disease_assoc_type(self):
        """
        Test that a gene disease type that we have
        not mapped in translationtable/orphanet.yaml
        raises a ValueError
        """
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml'
        self.assertRaises(
            KeyError, lambda: self.orphanet._process_diseasegene(limit=None))
        return
Exemple #17
0
class GeneVariantDiseaseTest(unittest.TestCase):

    def setUp(self):
        """
        """
        self.test_util = TestUtils()
        self.orphanet = Orphanet('rdf_graph', True)
        # Override so tests don't break when we update terms
        # Note there is no such file ./resources/test_terms.yaml
        # self.globaltt = self.orphanet.open_and_parse_yaml(
        #    os.path.join(os.path.dirname(__file__), './resources/test_terms.yaml'))
        self.orphanet.rawdir = os.path.join(
            os.path.dirname(__file__), 'resources/orphanet')

    def tearDown(self):
        self.orphanet = None
        return

    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8")
        )
        expected_triples = """
MONARCH:b40e89f44906ccededb6 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class .

HGNC:30497 a owl:Class ;
    rdfs:label "KS1" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf OBO:SO_0001217 ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
        ORPHA:268061 .

<https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> a GENO:0000002 ;
    rdfs:label "germline variant of KS1" ;
    GENO:0000418 HGNC:30497;
    RO:0003303 ORPHA:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return

    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.warning(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8")
        )
        expected_triples = """
MONARCH:b40e89f44906ccededb6 OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> .

HGNC:30497 a owl:Class ;
    rdfs:label "KS1" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

<https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ;
    rdfs:label "germline loss of function variant of KS1" ;
    GENO:0000418 HGNC:30497 ;
    RO:0003303 ORPHA:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 ;
    :has_functional_consequence SO:0002054 .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples, self.orphanet.graph))
        return

    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle") .decode("utf-8")
        )
        expected_triples = """
MONARCH:bd8eebdc522f33aca860 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0003304 ;
    OBAN:association_has_subject HGNC:30497 .

HGNC:30497 a owl:Class ;
    rdfs:label "KS1" ;
    RO:0003304 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return

    def test_unmapped_disease_assoc_type(self):
        """
        Test that a gene disease type that we have
        not mapped in translationtable/orphanet.yaml
        raises a ValueError
        """
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml'
        self.assertRaises(
            ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
        return
Exemple #18
0
class EvidenceProvenanceTestCase(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        self.test_set_1 = (
            'MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male',
            'heterozygote', 'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>',
            'targeted mutation 1a, Wellcome Trust Sanger Institute',
            'MGI:2159965', 'C57BL/6N', 'MGP',
            'Wellcome Trust Sanger Institute Mouse Genetics Project',
            'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray',
            'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390',
            'skeleton phenotype', 'MP:0000480', 'increased rib number',
            '1.637023E-010', '', '8.885439E-007',
            'Wilcoxon rank sum test with continuity correction', 'IMPC')

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        # these blank nodes are hardcoded as NOT Skolemized  ...
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return

    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True) # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        triples = """
    :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> .
    
    <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ;
        SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>,
            <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ;
        SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

    <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 1.637023e-10 .

    <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))

    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        triples = """
    <https://monarchinitiative.org/.well-known/genid/bbdd05a8ca155dda> a OBI:0000471 ;
      BFO:0000051 OBO:STATO_0000076,
          <https://www.mousephenotype.org/impress/protocol/175/15> ;
      BFO:0000050  IMPRESS-procedure:15 ,
          <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
      SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
      SEPIO:0000017 <http://www.sanger.ac.uk/>  .
      
    <https://monarchinitiative.org/.well-known/genid/bc0b26361b8687b5> a owl:NamedIndividual ;
        rdfs:label "MEFW" .

    <http://www.sanger.ac.uk/> a foaf:organization ;
        rdfs:label "WTSI" .

    <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
        rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

    <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
        rdfs:label "Number of ribs right (X-ray)" .

    IMPRESS-procedure:15 a owl:NamedIndividual ;
        rdfs:label "MGP Select Pipeline" .

    <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
        rdfs:label "X-ray" .
"""
        # dbg
        logger.debug("Reference graph: %s",
                     impc.graph.serialize(format="turtle")
                               .decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))

    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        impc._add_assertion_provenance(
            self.assoc_curie, self.evidence_curie, impc_map)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> .
    <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> a SEPIO:0000001 ;
        SEPIO:0000018 <http://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <http://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .

        """
        # dbg
        logger.debug("Reference graph: %s",
                     impc.graph.serialize(format="turtle")
                                      .decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))

    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        impress_map = json.loads(
            impc.fetch_from_url(
                impc.map_files['impress_map']).read().decode('utf-8'))
        impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map'])

        # fetch file
        impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count == line_to_test:
                    self.test_set_1 = row
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = row[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = row[12:19]
        (statistical_method, resource_name) = row[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value,
                           percentage_change, effect_size, self.study_curie)

        impc._add_study_provenance(
            impc_map, impress_map, phenotyping_center, colony,
            project_fullname, pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test
        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""
        sparql_output = impc.graph.query(sparql_query)
        # Test that query passes and returns one row
        self.assertEqual(len(list(sparql_output)), 1)

    def tearDown(self):
        return
Exemple #19
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """
    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context':
            'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc':
            '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
            variant_curie, strongest_snp_risk_allele,
            chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology)
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        so_ontology = RDFGraph()
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'],
            self.test_data['chrom_num'], self.test_data['chrom_pos'],
            self.test_data['context'], self.test_data['allele_freq'],
            self.test_data['mapped_gene'], so_ontology)

        triples = """
:haplotype_bb627b1f64039b0f751a a SO:0001024 ;
    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
    GENO:0000382 dbSNP:rs1329573,
        dbSNP:rs3758171,
        dbSNP:rs3824344,
        dbSNP:rs7020413 ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs1329573 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs1329573-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs3758171 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs3758171-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs3824344 a SO:0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3824344-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs7020413 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs7020413-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

<https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> a faldo:Region ;
    rdfs:label "GRCh38chr9-36997420-36997420-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> .

<https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> a faldo:Region ;
    rdfs:label "GRCh38chr9-36998996-36998996-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> .

<https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> a faldo:Region ;
    rdfs:label "GRCh38chr9-37000690-37000690-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> .

<https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> a faldo:Region ;
    rdfs:label "GRCh38chr9-37002118-37002118-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> .

<https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> a faldo:Position ;
    rdfs:label "GRCh38chr9-36997420";
    faldo:position 36997420 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> a faldo:Position ;
    rdfs:label "GRCh38chr9-36998996";
    faldo:position 36998996 ;
    faldo:reference CHR:GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> a faldo:Position ;
    rdfs:label "GRCh38chr9-37000690";
    faldo:position 37000690 ;
    faldo:reference CHR:GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> a faldo:Position ;
    rdfs:label "GRCh38chr9-37002118";
    faldo:position 37002118 ;
    faldo:reference CHR:GRCh38chr9 .
        """

        # dbg
        LOG.debug("Reference graph: %s",
                  self.source.graph.serialize(format="turtle").decode("utf-8"))

        self.assertTrue(
            self.test_util.test_graph_equality(triples, self.source.graph))
Exemple #20
0
class TestGwasSNPModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)  # Reset graph
        self.source.graph.bind_all_namespaces()
        self.test_data = {
            'snp_label': 'rs1491921-C',
            'chrom_num': '5',
            'chrom_pos': '21259029',
            'context': 'intergenic_variant',
            'allele_freq': '0.013',
            'trait': 'Diisocyanate-induced asthma',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
            'pvalue': '0.0000007',
            'merged': '0',
            'snp_id_current': '1491921',
            'mapped_gene': 'LOC102723561 - GUSBP1',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [1556551]',
            'pubmed': '25918132'
        }

    def tearDown(self):
        self.source = None
        self.efo_ontology = None

    def test_snp_type_resolution(self):
        """
        Given the label: rs1491921-C
        return dbSNP:rs1491921, snp
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.assertEqual(variant_curie, "dbSNP:rs1491921")
        self.assertEqual(variant_type, 'snp')

    def test_snp_model(self):
        """
        Test output model of _add_snp_to_graph()
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_snp_to_graph(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'])

        triples = """
    dbSNP:rs1491921 a OBO:SO_0000694, OBO:SO_0001628 ;
        rdfs:label "rs1491921-C" ;
        faldo:location  <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ;
        OBO:RO_0002162 OBO:NCBITaxon_9606 ;
        dc:description "0.013 [risk allele frequency]" .

    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ;
        faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ;
        faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> .

    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ;
        faldo:position 21259029 ;
        faldo:reference OBO:CHR_GRCh38chr5 .
"""
        # To debug
        # print(self.source.graph.serialize(format="turtle").decode("utf-8"))
        # self.assertTrue(False)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #   self.source.graph.serialize(format="turtle").decode("utf-8"))

        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_snp_gene_relation(self):
        """
        test the _add_snp_gene_relation function
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_snp_gene_relation(
            variant_curie, self.test_data['snp_gene_nums'],
            self.test_data['upstream_gene_num'],
            self.test_data['downstream_gene_num'])

        triples = """
        dbSNP:rs1491921 OBO:RO_0002528 NCBIGene:107986180 ;
            OBO:RO_0002529 NCBIGene:107986179 .
        """
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_deprecated_snp(self):
        """
        test the _add_deprecated_snp
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        # fake data
        snp_id_current = '12345'
        merged = '1'

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_deprecated_snp(
            variant_curie, snp_id_current, merged,
            self.test_data['chrom_num'], self.test_data['chrom_pos'])

        triples = """
        dbSNP:rs1491921 a owl:NamedIndividual ;
            OBO:IAO_0100001 dbSNP:rs12345 ;
            owl:deprecated true .

        dbSNP:rs12345 MONARCH:cliqueLeader true .
        """
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        efo_ontology = RDFGraph()
        LOG.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        LOG.info("Finished loading EFO ontology")

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        triples = """


    MONARCH:bffc7a930c08cc8fe931 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0003949 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    MONARCH:bff9b97458d67ed7f517 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0006995 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    EFO:0003949 a owl:Class ;
        rdfs:label "eye color"^^xsd:string ;
        rdfs:subClassOf UPHENO:0001001 .

    dbSNP:rs1491921 RO:0003304 EFO:0003949,
            EFO:0006995 .

    PMID:25918132 a OBO:IAO_0000013 .
        """.format(description)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #    self.source.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Exemple #21
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context': 'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
            variant_curie, strongest_snp_risk_allele,
            chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology)
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        so_ontology = RDFGraph()
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology)

        triples = """
:haplotype_bb627b1f64039b0f751a a OBO:GENO_0000871 ;
    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
    OBO:GENO_0000382 dbSNP:rs1329573,
        dbSNP:rs3758171,
        dbSNP:rs3824344,
        dbSNP:rs7020413 ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs1329573 a OBO:SO_0000694,
        SO:0001627 ;
    rdfs:label "rs1329573-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3758171 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3758171-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3824344 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3824344-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs7020413 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs7020413-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
    faldo:position 36997420 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
    faldo:position 36998996 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
    faldo:position 37000690 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
    faldo:position 37002118 ;
    faldo:reference OBO:CHR_GRCh38chr9 .
        """

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #   self.source.graph.serialize(format="turtle").decode("utf-8"))

        #  Does not seem to acknowlage these constant triples 
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Exemple #22
0
class UDPTestCase(unittest.TestCase):
    """
    Test UDP parser
    """

    def setUp(self):
        self.test_util = TestUtils()
        return

    def tearDown(self):
        return

    def test_dbsnp_indel_resolution(self):
        """
        unit test for _get_rs_id()
        Test that we can resolve indels that
        have different insertion sequence(s)
        for one rsid
        15	51766637	374313651	in-del	-/A/AA/AAA/AAAA/CAAA/TAAA
        """
        udp = UDP('rdf_graph', True)
        rs_map = udp._parse_rs_map_file(udp.map_files['dbsnp_map'])
        variant_type = 'indel'
        variant = {
            'build': 'hg19',
            'chromosome': 'chr15',
            'reference_allele': '-',
            'variant_allele': 'AAAA',
            'position': '51766637'
        }
        rsid = udp._get_rs_id(variant, rs_map, variant_type)

        self.assertEqual(rsid, '374313651')

    def test_dbsnp_snp_mapping(self):
        """
        unit test for _get_rs_id()
        Test that we can resolve snps in dbsnp
        to rsids
        """
        udp = UDP('rdf_graph', True)
        rs_map = udp._parse_rs_map_file(udp.map_files['dbsnp_map'])
        variant_type = 'snp'
        variant = {
            'build': 'hg19',
            'chromosome': 'chr15',
            'reference_allele': 'A',
            'variant_allele': 'C',
            'position': '54624219'
        }
        rsid = udp._get_rs_id(variant, rs_map, variant_type)

        self.assertEqual(rsid, '755532609')

    def test_patient_phenotype_model(self):
        """
        functional test for _parse_patient_phenotypes()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)

        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        mock_lines = [
            'patient_1\tHP:000001\tyes',
            'patient_1\tHP:000002\tno'
        ]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)
        udp._parse_patient_phenotypes(mock_file)
        triples = """
        :patient_1 a foaf:Person ;
            rdfs:label "patient_1" ;
            RO:0002200 DOID:4,
              HP:000001 .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, udp.graph))

    def test_variant_model(self):
        """
        functional test for _parse_patient_variants()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)
        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        data = ['patient_1',
                'family_1',
                '1',
                'HG19',
                '155230432',
                'G',
                'A',
                'Maternal',
                'Biallelic',
                'Non-synonymous;DOWNSTREAM',
                'CLK2',
                '',
                '',
                '',
                '',
                '',
                '',
                '',
                'Compound heterozygous',
                'Heterozygous',
                '',
                '0.002747253',
                '']
        test_data = "\t".join(data)
        mock_lines = [test_data]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)

        udp._parse_patient_variants(mock_file)

        triples = """
        :patient_1 GENO:0000222 <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> .

        <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> a SO:0001059 ;
            rdfs:label "hg19chr1(CLK2):g.155230432G>A" ;
            GENO:0000418 HGNC:2069 ;
            RO:0002162 NCBITaxon:9606 ;
            owl:sameAs dbSNP:rs11557757 .

        <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> a GENO:0000000 ;
            rdfs:label "patient_1 genotype" ;
            GENO:0000382 <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, udp.graph))
Exemple #23
0
class EvidenceProvenanceTestCase(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        # Headers:
        # 01 marker_accession_id,
        # 02 marker_symbol,
        # 03 phenotyping_center,
        # 04 colony_raw,
        # 05 sex,
        # 06 zygosity,
        # 07 allele_accession_id,
        # 08 allele_symbol,
        # 09 allele_name,
        # 10 strain_accession_id,
        # 11 strain_name,
        # 12 project_name,
        # 13 project_fullname,
        # 14 pipeline_name,
        # 15 pipeline_stable_id,
        # 16 procedure_stable_id,
        # 17 procedure_name,
        # 18 parameter_stable_id,
        # 19 parameter_name,
        # 20 top_level_mp_term_id,
        # 21 top_level_mp_term_name,
        # 22 mp_term_id,
        # 23 mp_term_name,
        # 24 p_value,
        # 25 percentage_change,
        # 26 effect_size,
        # 27 statistical_method,
        # 28 resource_name

        self.test_set_1 = (
            'MGI:1920145',              # 01
            'Setd5',                    # 02
            'WTSI',                     # 03
            'MEFW',                     # 04
            'male',                     # 05
            'heterozygote',             # 06
            'MGI:4432631',              # 07
            'Setd5<tm1a(EUCOMM)Wtsi>',  # 08
            'targeted mutation 1a, Wellcome Trust Sanger Institute',    # 09
            'MGI:2159965',              # 10
            'C57BL/6N',                 # 11
            'MGP',                      # 12
            'Wellcome Trust Sanger Institute Mouse Genetics Project',   # 13
            'MGP Select Pipeline',      # 14
            'MGP_001',                  # 15
            'MGP_XRY_001',              # 16
            'X-ray',                    # 17
            'IMPC_XRY_008_001',         # 18
            'Number of ribs right',     # 19
            'MP:0005390',               # 20
            'skeleton phenotype',       # 21
            'MP:0000480',               # 22
            'increased rib number',     # 23
            '1.637023E-010',            # 24
            '',                         # 25
            '8.885439E-007',            # 26
            'Wilcoxon rank sum test with continuity correction',    # 27
            'IMPC'            # 28
        )

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        # these blank nodes are hardcoded as NOT Skolemized  ...
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return

    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))

    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center,
         colony) = self.test_set_1[2:4]
        (project_name,
         project_fullname,
         pipeline_name,
         pipeline_stable_id,
         procedure_stable_id,
         procedure_name,
         parameter_stable_id,
         parameter_name) = self.test_set_1[11:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            phenotyping_center, colony,
            project_name,
            pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name)

        # dbg
        LOG.info(
            "Provenance graph as turtle:\n%s\n",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )

        triples = """
<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<https://monarchinitiative.org/.well-known/genid/b6f14f763c8d0629360e> a OBI:0000471 ;
    BFO:0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project>,
        IMPC-pipe:MGP_001 ;
    BFO:0000051 STATO:0000076,
        IMPC-proc:MGP_XRY_001 ;
    SEPIO:0000017 <http://www.sanger.ac.uk/> ;
    SEPIO:0000114 <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "MGP" .

<https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPC-pipe:MGP_001 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

IMPC-proc:MGP_XRY_001 a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        LOG.info(
            "Reference graph: %s",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )
        self.assertTrue(
            self.test_util.test_graph_equality(triples, impc.graph))

    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> .
    <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .
        """
        # dbg
        LOG.info(
            "Assertion graph:\n %s\n", impc.graph.serialize(
                format="turtle").decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))

    @unittest.skip("Timeouts on travis")
    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    LOG.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_name,project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[11:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_name,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        LOG.info(
            "Row %i graph as ntriples:\n%s\n",
            line_to_test, impc.graph.serialize(format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        LOG.info(
            "Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)

    def tearDown(self):
        return
Exemple #24
0
class EvidenceProvenanceTestCase(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        # Headers:
        # 01 marker_accession_id,
        # 02 marker_symbol,
        # 03 phenotyping_center,
        # 04 colony_raw,
        # 05 sex,
        # 06 zygosity,
        # 07 allele_accession_id,
        # 08 allele_symbol,
        # 09 allele_name,
        # 10 strain_accession_id,
        # 11 strain_name,
        # 12 project_name,
        # 13 project_fullname,
        # 14 pipeline_name,
        # 15 pipeline_stable_id,
        # 16 procedure_stable_id,
        # 17 procedure_name,
        # 18 parameter_stable_id,
        # 19 parameter_name,
        # 20 top_level_mp_term_id,
        # 21 top_level_mp_term_name,
        # 22 mp_term_id,
        # 23 mp_term_name,
        # 24 p_value,
        # 25 percentage_change,
        # 26 effect_size,
        # 27 statistical_method,
        # 28 resource_name

        self.test_set_1 = (
            'MGI:1920145',              # 01
            'Setd5',                    # 02
            'WTSI',                     # 03
            'MEFW',                     # 04
            'male',                     # 05
            'heterozygote',             # 06
            'MGI:4432631',              # 07
            'Setd5<tm1a(EUCOMM)Wtsi>',  # 08
            'targeted mutation 1a, Wellcome Trust Sanger Institute',    # 09
            'MGI:2159965',              # 10
            'C57BL/6N',                 # 11
            'MGP',                      # 12
            'Wellcome Trust Sanger Institute Mouse Genetics Project',   # 13
            'MGP Select Pipeline',      # 14
            'MGP_001',                  # 15
            'MGP_XRY_001',              # 16
            'X-ray',                    # 17
            'IMPC_XRY_008_001',         # 18
            'Number of ribs right',     # 19
            'MP:0005390',               # 20
            'skeleton phenotype',       # 21
            'MP:0000480',               # 22
            'increased rib number',     # 23
            '1.637023E-010',            # 24
            '',                         # 25
            '8.885439E-007',            # 26
            'Wilcoxon rank sum test with continuity correction',    # 27
            'IMPC'            # 28
        )

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        # these blank nodes are hardcoded as NOT Skolemized  ...
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return

    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))

    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(
            phenotyping_center, colony,
            project_fullname,
            pipeline_name, pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, 0)

        # dbg
        logger.info(
            "Provenance graph as turtle:\n%s\n",
            impc.graph.serialize(format="turtle").decode("utf-8")
        )

        triples = """
<https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ;
  BFO:0000051 OBO:STATO_0000076,
      <https://www.mousephenotype.org/impress/protocol/175/15> ;
  BFO:0000050  IMPRESS-procedure:15 ,
      <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
  SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
  SEPIO:0000017 <http://www.sanger.ac.uk/>  .

<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

<https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPRESS-procedure:15 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        logger.debug(
            "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8")
        )
        self.assertTrue(
            self.test_util.test_graph_equality(triples, impc.graph))

    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> .
    <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .

        """
        # dbg
        logger.info(
            "Assertion graph:\n %s\n", impc.graph.serialize(
                format="turtle").decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))

    def test_random_data_set(self):
        """
        Download dataset using fetch(), then take a row of data and
        run through evidence and provenance functions to test the output

        Line of data is hardcoded, but theoretically should work on any line
        """
        line_to_test = 1129
        count = 0
        impc = IMPC('rdf_graph', False)   # Not Skolem
        self.test_set_N = []
        # fetch file
        # impc.fetch(True)
        file_path = '/'.join((impc.rawdir, impc.files['all']['file']))
        with gzip.open(file_path, 'rt') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            for row in filereader:
                count += 1
                if count < line_to_test:
                    continue
                elif count == line_to_test:
                    self.test_set_N = row
                elif count > line_to_test:
                    logger.info("stopped at line:\t%s\n", count)
                    break

        # Some DRY violation with the above tests
        (phenotyping_center, colony) = self.test_set_N[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_N[12:19]
        (statistical_method, resource_name) = self.test_set_N[26:28]

        (p_value, percentage_change, effect_size) = self.test_set_N[23:26]

        # adding evidence
        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        # adding  study
        impc._add_study_provenance(
            phenotyping_center, colony, project_fullname,
            pipeline_name,
            pipeline_stable_id,
            procedure_stable_id, procedure_name,
            parameter_stable_id, parameter_name,
            statistical_method, resource_name, line_to_test)

        # Note that this doesn't test much since we're dealing with
        # multiple part_of  and has_part links to individuals
        # which results in ambiguity = hard to test

        # dbg
        logger.info(
            "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize(
                format="ntriples").decode("utf-8")
        )

        sparql_query = """
SELECT *
WHERE {
    ?assoc SEPIO:0000007 ?evidenceline .
    ?evidenceline a ECO:0000015 ;
        SEPIO:0000085 _:study .

    ?study a OBI:0000471 ;
        SEPIO:0000114 ?param ;
        SEPIO:0000017 ?agent .
}
"""

        sparql_output = impc.graph.query(sparql_query)
        logger.info("Test that query for row %i passes and returns one row", int(line_to_test))

        # print("Sparql Output: %s\n", list(sparql_output) )
        # it is an array with one list with five vars in it

        self.assertEqual(len(list(sparql_output)), 1)

    def tearDown(self):
        return
Exemple #25
0
class StringTestFakeData(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        # Test set with two proteins from same species
        self.test_set_1 = [[
            '9606.ENSP00000000233', '9606.ENSP00000003084',
            0, 0, 0, 0, 300, 0, 150, 800]]

        # Test set with deprecated protein id
        self.test_set_2 = [[
            '9606.ENSP00000000233', '9606.ENSP00000006101',
            0, 0, 0, 0, 300, 0, 150, 800]]

        self.columns = [
            'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence',
            'coexpression', 'experimental', 'database', 'textmining', 'combined_score']

        ensembl = Ensembl('rdf_graph', True)
        self.protein_list = ensembl.fetch_protein_gene_map('9606')

        return

    def tearDown(self):
        return

    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map('9606')
        for key in prot_map.keys():
            for i, gene in enumerate(prot_map[key]):
                prot_map[key][i] = "ENSEMBL:{}".format(gene)

        print(
            "Finished fetching ENSP IDs, fetched {} proteins"
            .format(len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, '9606')

        # g1 <interacts with> g2
        triples = """
ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))

    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, '9606')
        self.assertEqual(len(string_db.graph), 0)
Exemple #26
0
class GeneVariantDiseaseTest(unittest.TestCase):
    def setUp(self):
        """
        """
        self.test_util = TestUtils()
        self.orphanet = Orphanet('rdf_graph', True)
        # Override so tests don't break when we update terms
        self.globaltt = self.orphanet.open_and_parse_yaml(
            os.path.join(os.path.dirname(__file__),
                         './resources/test_terms.yaml'))
        self.orphanet.rawdir = os.path.join(os.path.dirname(__file__),
                                            'resources/orphanet')

    def tearDown(self):
        self.orphanet = None
        return

    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b2cd4dfacc21d0e28c39 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/b56f798350412a34> .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf OBO:SO_0001217 ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
        HGNC:30497 .

<https://monarchinitiative.org/.well-known/genid/b56f798350412a34> a GENO:0000002 ;
    rdfs:label "germline variant of KS1" ;
    GENO:0000418 Orphanet:268061 ;
    RO:0003303 Orphanet:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))

    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b53dada0eb229a75e705 OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

<https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ;
    rdfs:label "germline loss of function variant of KS1" ;
    GENO:0000418 Orphanet:268061 ;
    RO:0003303 Orphanet:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 ;
    :has_functional_consequence SO:0002054 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))

    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b64684a0ea6ae59fdb09 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003304 ;
    OBAN:association_has_subject Orphanet:268061 .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    RO:0003304 Orphanet:938475 ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))

    def test_unmapped_disease_assoc_type(self):
        """
        Test that a gene disease type that we have
        not mapped in translationtable/orphanet.yaml
        raises a ValueError
        """
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml'
        self.assertRaises(
            ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
Exemple #27
0
class RGDTestCase(unittest.TestCase):
    def setUp(self):
        self.test_util = TestUtils()
        self.test_set_1 = {
            'aspect':
            'N',
            'date':
            '2006-10-26',
            'evidence': {
                'has_supporting_reference': ['RGD:1581841', 'PMID:12799311'],
                'type': 'IED',
                'with_support_from': []
            },
            'negated':
            False,
            'object': {
                'id': 'MP:0003340',
                'taxon': 'NCBITaxon:10116'
            },
            'provided_by':
            'RGD',
            'qualifiers': [],
            'relation': {
                'id': None
            },
            'source_line':
            'RGD\t2535\tEdnra\t\tMP:0003340\tRGD:1581841|PMID:12799311\t'
            'IED\t\tN\tendothelin receptor type A\t\tgene\ttaxon:10116\t'
            '20061026\tRGD\t\t\n',
            'subject': {
                'fullname': 'endothelin receptor type A',
                'id': 'RGD:2535',
                'label': 'Ednra',
                'synonyms': [],
                'taxon': {
                    'id': 'NCBITaxon:10116'
                },
                'type': 'gene'
            },
            'subject_extensions': [{
                'filler': '\n',
                'property': 'isoform'
            }]
        }

        return

    def tearDown(self):
        return

    def testRGDParser(self):
        rgd = RGD('rdf_graph', True)
        rgd.graph = RDFGraph(True)

        self.assertTrue(len(list(rgd.graph)) == 0)

        rgd.make_association(record=self.test_set_1)
        triples = """
    :MONARCH_b4650e8c3d865f11a1a5 a OBAN:association ;
        RO:0002558 ECO:0005611 ;
        dcterms:source RGDRef:1581841 ;
        OBAN:association_has_object OBO:MP_0003340 ;
        OBAN:association_has_predicate OBO:RO_0002200 ;
        OBAN:association_has_subject RGD:2535 ;
        pav:createdOn "2006-10-26" .

    RGD:2535 OBO:RO_0002200 MP:0003340 .
        RGDRef:1581841 a IAO:0000311 ;
        owl:sameAs PMID:12799311 .
        """
        # dbg
        logger.debug("Reference graph: %s",
                     rgd.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(triples, rgd.graph))
Exemple #28
0
class StringTestFakeData(unittest.TestCase):

    def setUp(self):
        self.test_util = TestUtils()
        # Test set with two proteins from same species
        self.test_set_1 = [[
            '9606.ENSP00000000233', '9606.ENSP00000003084',
            0, 0, 0, 0, 300, 0, 150, 800]]

        # Test set with deprecated protein id
        self.test_set_2 = [[
            '9606.ENSP00000000233', '9606.ENSP00000006101',
            0, 0, 0, 0, 300, 0, 150, 800]]

        self.columns = [
            'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence',
            'coexpression', 'experimental', 'database', 'textmining', 'combined_score']

        ensembl = Ensembl('rdf_graph', True)
        self.protein_list = ensembl.fetch_protein_gene_map('9606')

        return

    def tearDown(self):
        return

    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map('9606')

        [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()]

        print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map)))

        # just looking
        # for key in prot_map:
        #    if string_db.graph.curie_regexp.match(prot_map[key]) is None:
        #        print("INVALID curie for %s from %s", prot_map[key], key)

        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, '9606')

        # g1 <interacts with> g2
        triples = """
ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
ENSEMBL:ENSG00000001626 rdf:type SO:0000704 .
ENSEMBL:ENSG00000004059 rdf:type SO:0000704 .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))

    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, '9606')
        self.assertEqual(len(string_db.graph), 0)