def build_ensembl_transcripts_by_genomic_location_index(self, ensembl_transcript_index_fname, output_filename, protocol="file"): """Create an index for genomic position to transcripts index, using a transcript index created in build_ensembl_transcript_index """ transcript_db = Shove(protocol + "://" + ensembl_transcript_index_fname) output_db = Shove(protocol + "://" + output_filename, optimize=False) transcript_keys = transcript_db.keys() for i,tx_id in enumerate(transcript_keys): tx = transcript_db[tx_id] start = tx.get_start() end = tx.get_end() genomic_location_bin = region2bin(start, end) key = tx.get_contig() + "_" + str(genomic_location_bin) try: tmpList = output_db[key] except KeyError: output_db[key] = [] tmpList = output_db[key] tmpList.append(tx) output_db[key] = tmpList if (i+1) % 10000 == 0: logging.getLogger(__name__).info("Genomic position index added " + str(i) + " transcripts so far.") output_db.close() transcript_db.close()
def test_region2bin(self): """Simple test that the region2bin works for genomic position indexing """ # Footprint for PIK3CA transcript chr3:178,866,311-178,952,497 uc003fjk.3 guess = region2bin(178866311, 178952497) self.assertTrue(guess == 243)
def build_ensembl_transcripts_by_genomic_location_index( self, ensembl_transcript_index_fname, output_filename, protocol="file"): """Create an index for genomic position to transcripts index, using a transcript index created in build_ensembl_transcript_index """ transcript_db = Shove(protocol + "://" + ensembl_transcript_index_fname) output_db = Shove(protocol + "://" + output_filename, optimize=False) transcript_keys = transcript_db.keys() for i, tx_id in enumerate(transcript_keys): tx = transcript_db[tx_id] start = tx.get_start() end = tx.get_end() genomic_location_bin = region2bin(start, end) key = tx.get_contig() + "_" + str(genomic_location_bin) try: tmpList = output_db[key] except KeyError: output_db[key] = [] tmpList = output_db[key] tmpList.append(tx) output_db[key] = tmpList if (i + 1) % 10000 == 0: logging.getLogger( __name__).info("Genomic position index added " + str(i) + " transcripts so far.") output_db.close() transcript_db.close()