def __init__(self,  src_file, title='ENSEMBL', version='', tx_mode=TranscriptProvider.TX_MODE_CANONICAL, protocol="file", is_thread_safe=False, tx_filter="dummy", custom_canonical_txs=None):
        super(EnsemblTranscriptDatasource, self).__init__(src_file=src_file, title=title, version=version)

        ensembl_index_fname = src_file + ".transcript.idx"
        ensembl_gene_to_transcript_index_fname = src_file + ".transcript_by_gene.idx"
        ensembl_genomic_position_bins_to_transcript_index_fname = src_file + ".transcript_by_gp_bin.idx"

        # Seconds before a cache entry should be cleared out
        timeout = 1000
        max_entries = 25000
        cache_protocol = "memory"
        if not is_thread_safe:
            logging.getLogger(__name__).info("%s %s is being set up in faster, NOT thread-safe mode (for annotation).  " % (title, version))
            cache_protocol = "simple"

        # Contains a key of transcript id and value of a Transcript class, with sequence data where possible.
        # By specifying "memory" for the cache, this is thread safe.  Otherwise, use "simple"
        self.transcript_db = shove.Shove(protocol + '://%s' % ensembl_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)
        self.gene_db = shove.Shove(protocol + '://%s' % ensembl_gene_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)
        self.gp_bin_db = shove.Shove(protocol + '://%s' % ensembl_genomic_position_bins_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)

        tmp = self.gp_bin_db.keys()

        logging.getLogger(__name__).info("%s %s is being set up with default tx-mode: %s.  " % (title, version, tx_mode))
        self.set_tx_mode(tx_mode)

        logging.getLogger(__name__).info("%s %s is being set up with %s filtering.  " % (title, version, tx_filter))
        self._tx_filter = TranscriptFilterFactory.create_instance(tx_filter)

        self._hgvs_xformer = HgvsChangeTransformer()

        # Store a list of the custom canonical transcripts
        self._custom_canonical_txs = custom_canonical_txs or []
    def __init__(self,  src_file, title='ENSEMBL', version='', tx_mode=TranscriptProvider.TX_MODE_CANONICAL, protocol="file", is_thread_safe=False, tx_filter="dummy"):
        super(EnsemblTranscriptDatasource, self).__init__(src_file=src_file, title=title, version=version)

        ensembl_index_fname = src_file + ".transcript.idx"
        ensembl_gene_to_transcript_index_fname = src_file + ".transcript_by_gene.idx"
        ensembl_genomic_position_bins_to_transcript_index_fname = src_file + ".transcript_by_gp_bin.idx"

        # Seconds before a cache entry should be cleared out
        timeout = 1000
        max_entries = 25000
        cache_protocol = "memory"
        if not is_thread_safe:
            logging.getLogger(__name__).info("%s %s is being set up in faster, NOT thread-safe mode (for annotation).  " % (title, version))
            cache_protocol = "simple"

        # Contains a key of transcript id and value of a Transcript class, with sequence data where possible.
        # By specifying "memory" for the cache, this is thread safe.  Otherwise, use "simple"
        self.transcript_db = shove.Shove(protocol + '://%s' % ensembl_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)
        self.gene_db = shove.Shove(protocol + '://%s' % ensembl_gene_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)
        self.gp_bin_db = shove.Shove(protocol + '://%s' % ensembl_genomic_position_bins_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries)

        tmp = self.gp_bin_db.keys()

        logging.getLogger(__name__).info("%s %s is being set up with default tx-mode: %s.  " % (title, version, tx_mode))
        self.set_tx_mode(tx_mode)

        logging.getLogger(__name__).info("%s %s is being set up with %s filtering.  " % (title, version, tx_filter))
        self._tx_filter = TranscriptFilterFactory.create_instance(tx_filter)

        self._hgvs_xformer = HgvsChangeTransformer()
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds("out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)
Example #4
0
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds(
            "out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)