def test_hashcode_changes_when_tx_mode_changes(self):
        """Test that a call to set_tx_mode will change the md5 hash for the datasource"""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_")
        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        dummy_seed = "dummy"
        ds.set_hashcode(dummy_seed)

        initial_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != dummy_seed)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != be_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash)

        new_dummy_seed = "new_dummy"
        ds.set_hashcode(new_dummy_seed)

        # MAke sure new_dummy changes the hash.
        initial_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != initial_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != be_hash2)
        self.assertTrue(be_hash != be_hash2)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash2)
Esempio n. 2
0
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(
            m2['annotation_transcript'].startswith("ENST00000544786"))
Esempio n. 3
0
 def test_check_for_missing_appris_tag(self):
     """Check that the correct value is returned for a site with no appris tag """
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag", )
     txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100)
     self.assertTrue(len(txs) > 0)
     self.assertEquals(ds._get_appris_rank(txs[0]),
                       TranscriptProviderUtils.NO_APPRIS_VALUE)
Esempio n. 4
0
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/small_positive_strand_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1A>C",
            "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1G>T",
            "Incorrect transcript change: " + m2['transcript_change'])
    def test_hashcode_changes_when_tx_mode_changes(self):
        """Test that a call to set_tx_mode will change the md5 hash for the datasource"""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_")
        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        dummy_seed = "dummy"
        ds.set_hashcode(dummy_seed)

        initial_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != dummy_seed)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != be_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash)

        new_dummy_seed = "new_dummy"
        ds.set_hashcode(new_dummy_seed)

        # MAke sure new_dummy changes the hash.
        initial_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != initial_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != be_hash2)
        self.assertTrue(be_hash != be_hash2)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash2)
Esempio n. 6
0
 def test_check_for_appris_tag(self):
     """Test that a transcript with an appris tag returns the right rank"""
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag", )
     txs = ds.get_overlapping_transcripts("22",
                                          22222050,
                                          22222050,
                                          padding=100)
     self.assertTrue(len(txs) == 1)
     self.assertEquals(ds._get_appris_rank(txs[0]), 0)
    def test_overlapping_gene_5flank(self):
        """Test that we can collect an overlapping gene on its 5' Flank """
        ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_flank")
        txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
        self.assertTrue( len(txs) == 1)
        self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3")

        txs = ds.get_overlapping_transcripts("22", 22224920, 22224920)
        self.assertTrue(len(txs) == 0)
    def test_overlapping_gene_5flank(self):
        """Test that we can collect an overlapping gene on its 5' Flank """
        ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_flank")
        txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
        self.assertTrue( len(txs) == 1)
        self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3")

        txs = ds.get_overlapping_transcripts("22", 22224920, 22224920)
        self.assertTrue(len(txs) == 0)
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds("out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)
Esempio n. 10
0
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds(
            "out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
 def test_hgvs_annotations_IGR(self):
     """Test that the HGVS annotations appear for IGR"""
     ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_IGR_")
     m = MutationData()
     m.createAnnotation('variant_type', 'SNP')
     m.createAnnotation('build', 'hg19')
     m.createAnnotation('variant_classification', 'IGR')
     m.createAnnotation('chr', '15')
     m.createAnnotation('start', 30938316)
     m.createAnnotation('end', 30938316)
     m.createAnnotation('ref_allele', 'G')
     m.createAnnotation('alt_allele', 'A')
     m2 = ds.annotate_mutation(m)
     self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A')
     self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '')
     self.assertEqual(m2.get('HGVS_protein_change', None), '')
    def test_retrieve_transcript_by_gene(self):
        """Simple test of retrieve_transcript_by_gene """
        gene = "MAPK1"

        ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_")
        txs = ds.retrieve_transcripts_by_gene(gene)

        self.assertTrue(len(txs) > 2)

        tx_ids = [tx.get_transcript_id() for tx in txs]

        self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene)
        self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene)

        for tx in txs:
            self.assertTrue(tx.get_gene() == gene)
    def test_retrieve_transcript_by_gene(self):
        """Simple test of retrieve_transcript_by_gene """
        gene = "MAPK1"

        ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_")
        txs = ds.retrieve_transcripts_by_gene(gene)

        self.assertTrue(len(txs) > 2)

        tx_ids = [tx.get_transcript_id() for tx in txs]

        self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene)
        self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene)

        for tx in txs:
            self.assertTrue(tx.get_gene() == gene)
 def test_hgvs_annotations_IGR(self):
     """Test that the HGVS annotations appear for IGR"""
     ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_IGR_")
     m = MutationDataFactory.default_create()
     m.createAnnotation('variant_type', 'SNP')
     m.createAnnotation('build', 'hg19')
     m.createAnnotation('variant_classification', 'IGR')
     m.createAnnotation('chr', '15')
     m.createAnnotation('start', 30938316)
     m.createAnnotation('end', 30938316)
     m.createAnnotation('ref_allele', 'G')
     m.createAnnotation('alt_allele', 'A')
     m2 = ds.annotate_mutation(m)
     self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A')
     self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '')
     self.assertEqual(m2.get('HGVS_protein_change', None), '')
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_empty_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationData()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
 def test_check_for_appris_tag(self):
     """Test that a transcript with an appris tag returns the right rank"""
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag",)
     txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
     self.assertTrue( len(txs) == 1)
     self.assertEquals(ds._get_appris_rank(txs[0]),0)
 def test_overlapping_gene(self):
     """Test that we can collect an overlapping gene """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_")
     genes = ds.get_overlapping_genes("22", 22115000, 22120000)
     self.assertTrue(len({"MAPK1"} - genes) == 0)
 def test_overlapping_multiple_genes(self):
     """Test that we can collect multiple overlapping genes """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_multiple_")
     genes = ds.get_overlapping_genes("22", 22080000, 22120000)
     self.assertTrue(len({"MAPK1", "YPEL1"} - genes) ==0 )
 def test_check_for_missing_appris_tag(self):
     """Check that the correct value is returned for a site with no appris tag """
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag",)
     txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100)
     self.assertTrue( len(txs) > 0)
     self.assertEquals(ds._get_appris_rank(txs[0]), TranscriptProviderUtils.NO_APPRIS_VALUE)
 def test_overlapping_multiple_genes(self):
     """Test that we can collect multiple overlapping genes """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_multiple_")
     genes = ds.get_overlapping_genes("22", 22080000, 22120000)
     self.assertTrue(len({"MAPK1", "YPEL1"} - genes) ==0 )
 def test_overlapping_gene(self):
     """Test that we can collect an overlapping gene """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_")
     genes = ds.get_overlapping_genes("22", 22115000, 22120000)
     self.assertTrue(len({"MAPK1"} - genes) == 0)