def homologene_uniprot_dict(self, source): """ Builds orthology translation table as dict from UniProt to Uniprot, obtained from NCBI HomoloGene data. Uses RefSeq and Entrez IDs for translation. """ source = self.get_source(source) self.h**o[source] = {} hge = dataio.homologene_dict(source, self.target, 'entrez') hgr = dataio.homologene_dict(source, self.target, 'refseq') self.load_proteome(source, self.only_swissprot) for u in self._proteomes[(source, self.only_swissprot)]: source_e = self.mapper.map_name(u, 'uniprot', 'entrez', source) source_r = self.mapper.map_name(u, 'uniprot', 'refseqp', source) target_u = set([]) target_r = set([]) target_e = set([]) for e in source_e: if e in hge: target_e.update(hge[e]) for r in source_r: if r in hgr: target_r.update(hgr[r]) for e in target_e: target_u.update( set( self.mapper.map_name(e, 'entrez', 'uniprot', self.target))) for r in target_r: target_u.update( set( self.mapper.map_name(e, 'refseqp', 'uniprot', self.target))) target_u = \ itertools.chain( *map( lambda tu: self.mapper.map_name( tu, 'uniprot', 'uniprot', self.target), target_u ) ) self.h**o[source][u] = sorted(list(target_u))
def test_homologene_dict(self): h = dataio.homologene_dict(9606, 10090, 'GeneSymbol') assert 'Stard10' in h['STARD10']