def __init__(self, src_file, title='ENSEMBL', version='', tx_mode=TranscriptProvider.TX_MODE_CANONICAL, protocol="file", is_thread_safe=False, tx_filter="dummy", custom_canonical_txs=None): super(EnsemblTranscriptDatasource, self).__init__(src_file=src_file, title=title, version=version) ensembl_index_fname = src_file + ".transcript.idx" ensembl_gene_to_transcript_index_fname = src_file + ".transcript_by_gene.idx" ensembl_genomic_position_bins_to_transcript_index_fname = src_file + ".transcript_by_gp_bin.idx" # Seconds before a cache entry should be cleared out timeout = 1000 max_entries = 25000 cache_protocol = "memory" if not is_thread_safe: logging.getLogger(__name__).info("%s %s is being set up in faster, NOT thread-safe mode (for annotation). " % (title, version)) cache_protocol = "simple" # Contains a key of transcript id and value of a Transcript class, with sequence data where possible. # By specifying "memory" for the cache, this is thread safe. Otherwise, use "simple" self.transcript_db = shove.Shove(protocol + '://%s' % ensembl_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gene_db = shove.Shove(protocol + '://%s' % ensembl_gene_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gp_bin_db = shove.Shove(protocol + '://%s' % ensembl_genomic_position_bins_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) tmp = self.gp_bin_db.keys() logging.getLogger(__name__).info("%s %s is being set up with default tx-mode: %s. " % (title, version, tx_mode)) self.set_tx_mode(tx_mode) logging.getLogger(__name__).info("%s %s is being set up with %s filtering. " % (title, version, tx_filter)) self._tx_filter = TranscriptFilterFactory.create_instance(tx_filter) self._hgvs_xformer = HgvsChangeTransformer() # Store a list of the custom canonical transcripts self._custom_canonical_txs = custom_canonical_txs or []
def __init__(self, src_file, title='ENSEMBL', version='', tx_mode=TranscriptProvider.TX_MODE_CANONICAL, protocol="file", is_thread_safe=False, tx_filter="dummy"): super(EnsemblTranscriptDatasource, self).__init__(src_file=src_file, title=title, version=version) ensembl_index_fname = src_file + ".transcript.idx" ensembl_gene_to_transcript_index_fname = src_file + ".transcript_by_gene.idx" ensembl_genomic_position_bins_to_transcript_index_fname = src_file + ".transcript_by_gp_bin.idx" # Seconds before a cache entry should be cleared out timeout = 1000 max_entries = 25000 cache_protocol = "memory" if not is_thread_safe: logging.getLogger(__name__).info("%s %s is being set up in faster, NOT thread-safe mode (for annotation). " % (title, version)) cache_protocol = "simple" # Contains a key of transcript id and value of a Transcript class, with sequence data where possible. # By specifying "memory" for the cache, this is thread safe. Otherwise, use "simple" self.transcript_db = shove.Shove(protocol + '://%s' % ensembl_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gene_db = shove.Shove(protocol + '://%s' % ensembl_gene_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gp_bin_db = shove.Shove(protocol + '://%s' % ensembl_genomic_position_bins_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) tmp = self.gp_bin_db.keys() logging.getLogger(__name__).info("%s %s is being set up with default tx-mode: %s. " % (title, version, tx_mode)) self.set_tx_mode(tx_mode) logging.getLogger(__name__).info("%s %s is being set up with %s filtering. " % (title, version, tx_filter)) self._tx_filter = TranscriptFilterFactory.create_instance(tx_filter) self._hgvs_xformer = HgvsChangeTransformer()
def test_basic_tag_filtering(self): """Test several cases for the BasicTagTranscriptFilter""" tx_filter = TranscriptFilterFactory.create_instance("basic") ensembl_ds = TestUtils._create_test_gencode_v19_ds("out/basic_tag_filter_ensembl_ds") tx_dict = ensembl_ds.getTranscriptDict() tx = tx_dict["ENST00000215832.6"] self.assertTrue(len(tx_filter.filter([tx])) == 1) attrib_dict = tx.get_other_attributes() attrib_dict.pop('tag', None) self.assertTrue(len(tx_filter.filter([tx])) == 0)
def test_basic_tag_filtering(self): """Test several cases for the BasicTagTranscriptFilter""" tx_filter = TranscriptFilterFactory.create_instance("basic") ensembl_ds = TestUtils._create_test_gencode_v19_ds( "out/basic_tag_filter_ensembl_ds") tx_dict = ensembl_ds.getTranscriptDict() tx = tx_dict["ENST00000215832.6"] self.assertTrue(len(tx_filter.filter([tx])) == 1) attrib_dict = tx.get_other_attributes() attrib_dict.pop('tag', None) self.assertTrue(len(tx_filter.filter([tx])) == 0)