def test_hashcode_changes_when_tx_mode_changes(self): """Test that a call to set_tx_mode will change the md5 hash for the datasource""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_") ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) dummy_seed = "dummy" ds.set_hashcode(dummy_seed) initial_hash = ds.get_hashcode() self.assertTrue(initial_hash != dummy_seed) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) be_hash = ds.get_hashcode() self.assertTrue(initial_hash != be_hash) ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) test_hash = ds.get_hashcode() self.assertTrue(test_hash == initial_hash) new_dummy_seed = "new_dummy" ds.set_hashcode(new_dummy_seed) # MAke sure new_dummy changes the hash. initial_hash2 = ds.get_hashcode() self.assertTrue(initial_hash2 != initial_hash) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) be_hash2 = ds.get_hashcode() self.assertTrue(initial_hash2 != be_hash2) self.assertTrue(be_hash != be_hash2) ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) test_hash = ds.get_hashcode() self.assertTrue(test_hash == initial_hash2)
def test_canonical_tx_list(self): """Test that specifying the canonical list will actually change the transcript selected. """ ds = TestUtils._create_test_gencode_v19_ds( "out/test_canonical_tx_list_") m = MutationDataFactory.default_create() m.chr = "22" m.start = "22142650" m.end = "22142650" m.ref_allele = "T" m.alt_allele = "A" ds.set_custom_canonical_txs(["ENST00000544786"]) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) # NOTE: tx list overrides best effect m2 = ds.annotate_mutation(m) self.assertTrue( m2['annotation_transcript'].startswith("ENST00000544786")) self.assertTrue( m2['variant_classification'] == VariantClassification.INTRON) ds.set_custom_canonical_txs([]) m2 = ds.annotate_mutation(m) self.assertTrue( m2['variant_classification'] == VariantClassification.MISSENSE) self.assertFalse( m2['annotation_transcript'].startswith("ENST00000544786"))
def test_check_for_missing_appris_tag(self): """Check that the correct value is returned for a site with no appris tag """ ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag", ) txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100) self.assertTrue(len(txs) > 0) self.assertEquals(ds._get_appris_rank(txs[0]), TranscriptProviderUtils.NO_APPRIS_VALUE)
def test_small_positive_strand_transcript_change(self): """Test one location on a transcript and make sure that the transcript change rendered properly """ ds = TestUtils._create_test_gencode_v19_ds( "out/small_positive_strand_") # Now for a negative strand m = MutationDataFactory.default_create() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m2 = ds.annotate_mutation(m) self.assertTrue( m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change']) # positive strand m = MutationDataFactory.default_create() m.chr = "3" m.start = "178916614" m.end = "178916614" m.ref_allele = "G" m.alt_allele = "T" m2 = ds.annotate_mutation(m) self.assertTrue( m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
def test_hashcode_changes_when_tx_mode_changes(self): """Test that a call to set_tx_mode will change the md5 hash for the datasource""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_") ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) dummy_seed = "dummy" ds.set_hashcode(dummy_seed) initial_hash = ds.get_hashcode() self.assertTrue(initial_hash != dummy_seed) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) be_hash = ds.get_hashcode() self.assertTrue(initial_hash != be_hash) ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) test_hash = ds.get_hashcode() self.assertTrue(test_hash == initial_hash) new_dummy_seed = "new_dummy" ds.set_hashcode(new_dummy_seed) # MAke sure new_dummy changes the hash. initial_hash2 = ds.get_hashcode() self.assertTrue(initial_hash2 != initial_hash) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) be_hash2 = ds.get_hashcode() self.assertTrue(initial_hash2 != be_hash2) self.assertTrue(be_hash != be_hash2) ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL) test_hash = ds.get_hashcode() self.assertTrue(test_hash == initial_hash2)
def test_check_for_appris_tag(self): """Test that a transcript with an appris tag returns the right rank""" ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag", ) txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100) self.assertTrue(len(txs) == 1) self.assertEquals(ds._get_appris_rank(txs[0]), 0)
def test_overlapping_gene_5flank(self): """Test that we can collect an overlapping gene on its 5' Flank """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_flank") txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100) self.assertTrue( len(txs) == 1) self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3") txs = ds.get_overlapping_transcripts("22", 22224920, 22224920) self.assertTrue(len(txs) == 0)
def test_overlapping_gene_5flank(self): """Test that we can collect an overlapping gene on its 5' Flank """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_flank") txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100) self.assertTrue( len(txs) == 1) self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3") txs = ds.get_overlapping_transcripts("22", 22224920, 22224920) self.assertTrue(len(txs) == 0)
def test_basic_tag_filtering(self): """Test several cases for the BasicTagTranscriptFilter""" tx_filter = TranscriptFilterFactory.create_instance("basic") ensembl_ds = TestUtils._create_test_gencode_v19_ds("out/basic_tag_filter_ensembl_ds") tx_dict = ensembl_ds.getTranscriptDict() tx = tx_dict["ENST00000215832.6"] self.assertTrue(len(tx_filter.filter([tx])) == 1) attrib_dict = tx.get_other_attributes() attrib_dict.pop('tag', None) self.assertTrue(len(tx_filter.filter([tx])) == 0)
def test_basic_tag_filtering(self): """Test several cases for the BasicTagTranscriptFilter""" tx_filter = TranscriptFilterFactory.create_instance("basic") ensembl_ds = TestUtils._create_test_gencode_v19_ds( "out/basic_tag_filter_ensembl_ds") tx_dict = ensembl_ds.getTranscriptDict() tx = tx_dict["ENST00000215832.6"] self.assertTrue(len(tx_filter.filter([tx])) == 1) attrib_dict = tx.get_other_attributes() attrib_dict.pop('tag', None) self.assertTrue(len(tx_filter.filter([tx])) == 0)
def test_hgvs_annotations_simple_SNP(self): """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_") # Now for a negative strand m = MutationData() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m.build = "hg19" m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C') self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
def test_hgvs_annotations_IGR(self): """Test that the HGVS annotations appear for IGR""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_IGR_") m = MutationData() m.createAnnotation('variant_type', 'SNP') m.createAnnotation('build', 'hg19') m.createAnnotation('variant_classification', 'IGR') m.createAnnotation('chr', '15') m.createAnnotation('start', 30938316) m.createAnnotation('end', 30938316) m.createAnnotation('ref_allele', 'G') m.createAnnotation('alt_allele', 'A') m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '') self.assertEqual(m2.get('HGVS_protein_change', None), '')
def test_retrieve_transcript_by_gene(self): """Simple test of retrieve_transcript_by_gene """ gene = "MAPK1" ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_") txs = ds.retrieve_transcripts_by_gene(gene) self.assertTrue(len(txs) > 2) tx_ids = [tx.get_transcript_id() for tx in txs] self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene) self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene) for tx in txs: self.assertTrue(tx.get_gene() == gene)
def test_retrieve_transcript_by_gene(self): """Simple test of retrieve_transcript_by_gene """ gene = "MAPK1" ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_") txs = ds.retrieve_transcripts_by_gene(gene) self.assertTrue(len(txs) > 2) tx_ids = [tx.get_transcript_id() for tx in txs] self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene) self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene) for tx in txs: self.assertTrue(tx.get_gene() == gene)
def test_hgvs_annotations_IGR(self): """Test that the HGVS annotations appear for IGR""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_IGR_") m = MutationDataFactory.default_create() m.createAnnotation('variant_type', 'SNP') m.createAnnotation('build', 'hg19') m.createAnnotation('variant_classification', 'IGR') m.createAnnotation('chr', '15') m.createAnnotation('start', 30938316) m.createAnnotation('end', 30938316) m.createAnnotation('ref_allele', 'G') m.createAnnotation('alt_allele', 'A') m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '') self.assertEqual(m2.get('HGVS_protein_change', None), '')
def test_hgvs_annotations_simple_SNP(self): """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_") # Now for a negative strand m = MutationDataFactory.default_create() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m.build = "hg19" m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C') self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
def test_no_mapping_file(self): """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)""" """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None) # Now for a negative strand m = MutationData() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m.build = "hg19" m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C') self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
def test_no_mapping_file(self): """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)""" """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None) # Now for a negative strand m = MutationDataFactory.default_create() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m.build = "hg19" m2 = ds.annotate_mutation(m) self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G') self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C') self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
def test_canonical_tx_list_empty(self): """Test that not specifying the canonical list will do nothing.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_empty_") m = MutationDataFactory.default_create() m.chr = "22" m.start = "22142650" m.end = "22142650" m.ref_allele = "T" m.alt_allele = "A" m2 = ds.annotate_mutation(m) self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786")) self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON) ds.set_custom_canonical_txs([]) m2 = ds.annotate_mutation(m) self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE) self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
def test_canonical_tx_list_empty(self): """Test that not specifying the canonical list will do nothing.""" ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_") m = MutationData() m.chr = "22" m.start = "22142650" m.end = "22142650" m.ref_allele = "T" m.alt_allele = "A" m2 = ds.annotate_mutation(m) self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786")) self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON) ds.set_custom_canonical_txs([]) m2 = ds.annotate_mutation(m) self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE) self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
def test_canonical_tx_list(self): """Test that specifying the canonical list will actually change the transcript selected. """ ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_") m = MutationDataFactory.default_create() m.chr = "22" m.start = "22142650" m.end = "22142650" m.ref_allele = "T" m.alt_allele = "A" ds.set_custom_canonical_txs(["ENST00000544786"]) ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT) # NOTE: tx list overrides best effect m2 = ds.annotate_mutation(m) self.assertTrue(m2['annotation_transcript'].startswith("ENST00000544786")) self.assertTrue(m2['variant_classification'] == VariantClassification.INTRON) ds.set_custom_canonical_txs([]) m2 = ds.annotate_mutation(m) self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE) self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
def test_small_positive_strand_transcript_change(self): """Test one location on a transcript and make sure that the transcript change rendered properly """ ds = TestUtils._create_test_gencode_v19_ds("out/small_positive_strand_") # Now for a negative strand m = MutationDataFactory.default_create() m.chr = "22" m.start = "22221730" m.end = "22221730" m.ref_allele = "T" m.alt_allele = "G" m2 = ds.annotate_mutation(m) self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change']) # positive strand m = MutationDataFactory.default_create() m.chr = "3" m.start = "178916614" m.end = "178916614" m.ref_allele = "G" m.alt_allele = "T" m2 = ds.annotate_mutation(m) self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
def test_check_for_appris_tag(self): """Test that a transcript with an appris tag returns the right rank""" ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag",) txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100) self.assertTrue( len(txs) == 1) self.assertEquals(ds._get_appris_rank(txs[0]),0)
def test_overlapping_gene(self): """Test that we can collect an overlapping gene """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_") genes = ds.get_overlapping_genes("22", 22115000, 22120000) self.assertTrue(len({"MAPK1"} - genes) == 0)
def test_overlapping_multiple_genes(self): """Test that we can collect multiple overlapping genes """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_multiple_") genes = ds.get_overlapping_genes("22", 22080000, 22120000) self.assertTrue(len({"MAPK1", "YPEL1"} - genes) ==0 )
def test_check_for_missing_appris_tag(self): """Check that the correct value is returned for a site with no appris tag """ ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag",) txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100) self.assertTrue( len(txs) > 0) self.assertEquals(ds._get_appris_rank(txs[0]), TranscriptProviderUtils.NO_APPRIS_VALUE)
def test_overlapping_multiple_genes(self): """Test that we can collect multiple overlapping genes """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_multiple_") genes = ds.get_overlapping_genes("22", 22080000, 22120000) self.assertTrue(len({"MAPK1", "YPEL1"} - genes) ==0 )
def test_overlapping_gene(self): """Test that we can collect an overlapping gene """ ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_") genes = ds.get_overlapping_genes("22", 22115000, 22120000) self.assertTrue(len({"MAPK1"} - genes) == 0)