Esempio n. 1
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map('9606')

        [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()]

        print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map)))

        # just looking
        # for key in prot_map:
        #    if string_db.graph.curie_regexp.match(prot_map[key]) is None:
        #        print("INVALID curie for %s from %s", prot_map[key], key)

        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, '9606')

        # g1 <interacts with> g2
        triples = """
ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))
Esempio n. 2
0
    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, '9606')
        self.assertEqual(len(string_db.graph), 0)
Esempio n. 3
0
    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, 9606)
        self.assertEqual(len(string_db.graph), 0)
Esempio n. 4
0
 def testFakeDataSet2(self):
     """
     Dataset contains a deprecated protein ID
     that we expect if filtered out by ensembl biomart
     We test that this returns a graph with 3 triples:
     MonarchData:string.ttl a owl:Ontology ;
     owl:versionIRI <https://archive.monarchinitiative.org/.../string.ttl> ;
     owl:versionInfo "some version"
     :return:
     """
     string_db = StringDB('rdf_graph', True)
     dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
     string_db._process_protein_links(dataframe, self.protein_list, 9606)
     self.assertEqual(len(string_db.graph), 3)
Esempio n. 5
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map(9606)
        for key in prot_map.keys():
            prot_map[key] = "ENSEMBL:{}".format(prot_map[key])

        print("Finished fetching ENSP IDs, "
              "fetched {} proteins".format(len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, 9606)

        triples = """
            ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            triples, string_db.graph))
Esempio n. 6
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map(9606)
        for key in prot_map.keys():
            prot_map[key] = "ENSEMBL:{}".format(prot_map[key])

        print("Finished fetching ENSP IDs, fetched {} proteins".format(
            len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, 9606)

        triples = """
            ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(
            self.test_util.test_graph_equality(triples, string_db.graph))
Esempio n. 7
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph.bind_all_namespaces()
        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map(9606)
        for key in prot_map.keys():
            prot_map[key] = "ENSEMBL:{}".format(prot_map[key])

        print("Finished fetching ENSP IDs, "
              "fetched {} proteins".format(len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, 9606)

        sparql_query = """
                      SELECT ?prot
                      WHERE {
                          ?prot RO:0002434 ENSEMBL:ENSG00000004059 .
                      }
                      """
        sparql_output = string_db.graph.query(sparql_query)
        results = list(sparql_output)
        expected = [(URIRef(string_db.graph._getNode("ENSEMBL:ENSG00000001626")),)]
        self.assertEqual(results, expected)