def build_ensembl_transcripts_by_genomic_location_index(self, ensembl_transcript_index_fname, output_filename, protocol="file"):
        """Create an index for genomic position to transcripts index, using a transcript index created in
            build_ensembl_transcript_index
        """
        transcript_db = Shove(protocol + "://" + ensembl_transcript_index_fname)
        output_db = Shove(protocol + "://" + output_filename, optimize=False)

        transcript_keys = transcript_db.keys()

        for i,tx_id in enumerate(transcript_keys):
            tx = transcript_db[tx_id]
            start = tx.get_start()
            end = tx.get_end()
            genomic_location_bin = region2bin(start, end)
            key = tx.get_contig() + "_" + str(genomic_location_bin)
            try:
                tmpList = output_db[key]
            except KeyError:
                output_db[key] = []
                tmpList = output_db[key]

            tmpList.append(tx)
            output_db[key] = tmpList
            if (i+1) % 10000 == 0:
                logging.getLogger(__name__).info("Genomic position index added " + str(i) + " transcripts so far.")

        output_db.close()
        transcript_db.close()
Exemple #2
0
    def test_region2bin(self):
        """Simple test that the region2bin works for genomic position indexing """

        # Footprint for PIK3CA transcript chr3:178,866,311-178,952,497  uc003fjk.3
        guess = region2bin(178866311, 178952497)

        self.assertTrue(guess == 243)
    def test_region2bin(self):
        """Simple test that the region2bin works for genomic position indexing """

        # Footprint for PIK3CA transcript chr3:178,866,311-178,952,497  uc003fjk.3
        guess = region2bin(178866311, 178952497)

        self.assertTrue(guess == 243)
    def build_ensembl_transcripts_by_genomic_location_index(
            self,
            ensembl_transcript_index_fname,
            output_filename,
            protocol="file"):
        """Create an index for genomic position to transcripts index, using a transcript index created in
            build_ensembl_transcript_index
        """
        transcript_db = Shove(protocol + "://" +
                              ensembl_transcript_index_fname)
        output_db = Shove(protocol + "://" + output_filename, optimize=False)

        transcript_keys = transcript_db.keys()

        for i, tx_id in enumerate(transcript_keys):
            tx = transcript_db[tx_id]
            start = tx.get_start()
            end = tx.get_end()
            genomic_location_bin = region2bin(start, end)
            key = tx.get_contig() + "_" + str(genomic_location_bin)
            try:
                tmpList = output_db[key]
            except KeyError:
                output_db[key] = []
                tmpList = output_db[key]

            tmpList.append(tx)
            output_db[key] = tmpList
            if (i + 1) % 10000 == 0:
                logging.getLogger(
                    __name__).info("Genomic position index added " + str(i) +
                                   " transcripts so far.")

        output_db.close()
        transcript_db.close()