def test_appris_selects_transcript(self): m = MutationData(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config) m = transcript_ds.annotate_mutation(m) tx = transcript_ds.get_transcript(m['annotation_transcript']) self.assertTrue(tx is not None, "Transcript was None when it should have been found. Does the ground truth transcript above need to be updated?") self.assertEqual(tx._transcript_id,'ENST00000321356.4')
def test_appris_selects_transcript(self): m = MutationDataFactory.default_create(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config) m = transcript_ds.annotate_mutation(m) tx = transcript_ds.get_transcript(m['annotation_transcript']) self.assertTrue(tx is not None, "Transcript was None when it should have been found. Does the ground truth transcript above need to be updated?") self.assertEqual(tx._transcript_id,'ENST00000321356.4')
def _get_chosen_tx_and_transcript_ds(self, chrom, loc): config = TestUtils.createUnitTestConfig() transcript_ds = TestUtils.createTranscriptProviderDatasource(config) transcript_ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) start_txs = transcript_ds.get_transcripts_by_pos(chr=chrom, start=str(loc), end=str(loc)) chosen_tx = transcript_ds._choose_transcript(start_txs, transcript_ds.get_tx_mode(), VariantClassification.VT_SNP, "", "", str(loc), str(loc)) return chosen_tx, transcript_ds
def test_retrieve_transcripts_from_region(self): """Test that we can retrieve a large number of transcripts. Requires a full gencode datasource.""" config = TestUtils.createUnitTestConfig() transcript_ds = TestUtils.createTranscriptProviderDatasource(config) filtered_txs = transcript_ds.get_transcripts_by_pos(chr="1", start="1", end="100000000") self.assertTrue(len(filtered_txs) > 4000) gene_set = set([tx.get_gene() for tx in filtered_txs]) self.assertTrue(len(gene_set) > 1500)
def test_3_prime_flank_annotation_negative_strand(self): m = MutationDataFactory.default_create(chr="5", start="1253255", end="1253255", ref_allele="A", alt_allele="T", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource( self.config) m = transcript_ds.annotate_mutation(m) self.assertEqual(m['variant_classification'], "3'Flank")
def test_not_5_prime_flank_annotation_positive_strand(self): m = MutationDataFactory.default_create(chr="3", start="180625088", end="180625088", ref_allele="C", alt_allele="A", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource( self.config) m = transcript_ds.annotate_mutation(m) self.assertEqual(m['variant_classification'], "IGR")
def test_appris_ccds_tag(self): m = MutationDataFactory.default_create(chr="1", start="200818757", end="200818757", ref_allele="C", alt_allele="A", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource( self.config) m = transcript_ds.annotate_mutation(m) tx = transcript_ds.get_transcript(m['annotation_transcript']) self.assertTrue( tx is not None, "Transcript was None when it should have been found. Does the ground truth transcript above need to be updated?" ) self.assertEqual(tx._transcript_id, 'ENST00000358823.2')
def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change): config = TestUtils.createUnitTestConfig() transcript_ds = TestUtils.createTranscriptProviderDatasource(config) cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close cc_txs_fp.close() transcript_ds.set_custom_canonical_txs(cc_txs) m = MutationData() m.chr = chrom m.start = start m.end = end m.ref_allele = ref m.alt_allele = alt m2 = transcript_ds.annotate_mutation(m) self.assertEqual(m2['protein_change'], gt_prot_change)
def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change): config = TestUtils.createUnitTestConfig() transcript_ds = TestUtils.createTranscriptProviderDatasource(config) cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close cc_txs_fp.close() transcript_ds.set_custom_canonical_txs(cc_txs) m = MutationDataFactory.default_create() m.chr = chrom m.start = start m.end = end m.ref_allele = ref m.alt_allele = alt m2 = transcript_ds.annotate_mutation(m) self.assertEqual(m2['protein_change'], gt_prot_change)
def test_3_prime_flank_annotation_negative_strand(self): m = MutationDataFactory.default_create(chr="5", start="1253255", end="1253255", ref_allele="A", alt_allele="T", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config) m = transcript_ds.annotate_mutation(m) self.assertEqual(m['variant_classification'], "3'Flank")
def test_not_5_prime_flank_annotation_positive_strand(self): m = MutationDataFactory.default_create(chr="3", start="180625088", end="180625088", ref_allele="C", alt_allele="A", build="hg19") transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config) m = transcript_ds.annotate_mutation(m) self.assertEqual(m['variant_classification'], "IGR")