def _get_chosen_tx_and_transcript_ds(self, chrom, loc):
     config = TestUtils.createUnitTestConfig()
     transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
     transcript_ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
     start_txs = transcript_ds.get_transcripts_by_pos(chr=chrom, start=str(loc), end=str(loc))
     chosen_tx = transcript_ds._choose_transcript(start_txs, transcript_ds.get_tx_mode(),
                                                  VariantClassification.VT_SNP, "", "", str(loc), str(loc))
     return chosen_tx, transcript_ds
 def _get_chosen_tx_and_transcript_ds(self, chrom, loc):
     config = TestUtils.createUnitTestConfig()
     transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
     transcript_ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
     start_txs = transcript_ds.get_transcripts_by_pos(chr=chrom, start=str(loc), end=str(loc))
     chosen_tx = transcript_ds._choose_transcript(start_txs, transcript_ds.get_tx_mode(),
                                                  VariantClassification.VT_SNP, "", "", str(loc), str(loc))
     return chosen_tx, transcript_ds
    def test_retrieve_transcripts_from_region(self):
        """Test that we can retrieve a large number of transcripts.  Requires a full gencode datasource."""
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        filtered_txs = transcript_ds.get_transcripts_by_pos(chr="1", start="1", end="100000000")

        self.assertTrue(len(filtered_txs) > 4000)
        gene_set = set([tx.get_gene() for tx in filtered_txs])
        self.assertTrue(len(gene_set) > 1500)
    def test_retrieve_transcripts_from_region(self):
        """Test that we can retrieve a large number of transcripts.  Requires a full gencode datasource."""
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        filtered_txs = transcript_ds.get_transcripts_by_pos(chr="1", start="1", end="100000000")

        self.assertTrue(len(filtered_txs) > 4000)
        gene_set = set([tx.get_gene() for tx in filtered_txs])
        self.assertTrue(len(gene_set) > 1500)
Example #5
0
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/small_positive_strand_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1A>C",
            "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1G>T",
            "Incorrect transcript change: " + m2['transcript_change'])
Example #6
0
 def test_check_for_missing_appris_tag(self):
     """Check that the correct value is returned for a site with no appris tag """
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag", )
     txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100)
     self.assertTrue(len(txs) > 0)
     self.assertEquals(ds._get_appris_rank(txs[0]),
                       TranscriptProviderUtils.NO_APPRIS_VALUE)
Example #7
0
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(
            m2['annotation_transcript'].startswith("ENST00000544786"))
 def test_appris_selects_transcript(self):
     m = MutationData(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(tx is not None, "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?")
     self.assertEqual(tx._transcript_id,'ENST00000321356.4')
    def test_hashcode_changes_when_tx_mode_changes(self):
        """Test that a call to set_tx_mode will change the md5 hash for the datasource"""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_")
        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        dummy_seed = "dummy"
        ds.set_hashcode(dummy_seed)

        initial_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != dummy_seed)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != be_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash)

        new_dummy_seed = "new_dummy"
        ds.set_hashcode(new_dummy_seed)

        # MAke sure new_dummy changes the hash.
        initial_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != initial_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != be_hash2)
        self.assertTrue(be_hash != be_hash2)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash2)
    def test_hashcode_changes_when_tx_mode_changes(self):
        """Test that a call to set_tx_mode will change the md5 hash for the datasource"""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hashcode_changes_when_tx_mode_changes_")
        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        dummy_seed = "dummy"
        ds.set_hashcode(dummy_seed)

        initial_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != dummy_seed)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash = ds.get_hashcode()
        self.assertTrue(initial_hash != be_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash)

        new_dummy_seed = "new_dummy"
        ds.set_hashcode(new_dummy_seed)

        # MAke sure new_dummy changes the hash.
        initial_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != initial_hash)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        be_hash2 = ds.get_hashcode()
        self.assertTrue(initial_hash2 != be_hash2)
        self.assertTrue(be_hash != be_hash2)

        ds.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        test_hash = ds.get_hashcode()
        self.assertTrue(test_hash == initial_hash2)
 def test_appris_selects_transcript(self):
     m = MutationDataFactory.default_create(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(tx is not None, "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?")
     self.assertEqual(tx._transcript_id,'ENST00000321356.4')
    def test_overlapping_gene_5flank(self):
        """Test that we can collect an overlapping gene on its 5' Flank """
        ds = TestUtils._create_test_gencode_ds("out/overlapping_genes_flank")
        txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
        self.assertTrue( len(txs) == 1)
        self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3")

        txs = ds.get_overlapping_transcripts("22", 22224920, 22224920)
        self.assertTrue(len(txs) == 0)
Example #13
0
 def test_check_for_appris_tag(self):
     """Test that a transcript with an appris tag returns the right rank"""
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag", )
     txs = ds.get_overlapping_transcripts("22",
                                          22222050,
                                          22222050,
                                          padding=100)
     self.assertTrue(len(txs) == 1)
     self.assertEquals(ds._get_appris_rank(txs[0]), 0)
    def test_overlapping_gene_5flank(self):
        """Test that we can collect an overlapping gene on its 5' Flank """
        ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_flank")
        txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
        self.assertTrue( len(txs) == 1)
        self.assertTrue(txs[0].get_transcript_id() == "ENST00000398822.3")

        txs = ds.get_overlapping_transcripts("22", 22224920, 22224920)
        self.assertTrue(len(txs) == 0)
    def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change):
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r')
        cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
        cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close
        cc_txs_fp.close()
        transcript_ds.set_custom_canonical_txs(cc_txs)
        m = MutationData()
        m.chr = chrom
        m.start = start
        m.end = end
        m.ref_allele = ref
        m.alt_allele = alt

        m2 = transcript_ds.annotate_mutation(m)

        self.assertEqual(m2['protein_change'], gt_prot_change)
    def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change):
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r')
        cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
        cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close
        cc_txs_fp.close()
        transcript_ds.set_custom_canonical_txs(cc_txs)
        m = MutationDataFactory.default_create()
        m.chr = chrom
        m.start = start
        m.end = end
        m.ref_allele = ref
        m.alt_allele = alt

        m2 = transcript_ds.annotate_mutation(m)

        self.assertEqual(m2['protein_change'], gt_prot_change)
Example #17
0
 def test_not_5_prime_flank_annotation_positive_strand(self):
     m = MutationDataFactory.default_create(chr="3",
                                            start="180625088",
                                            end="180625088",
                                            ref_allele="C",
                                            alt_allele="A",
                                            build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "IGR")
Example #18
0
 def test_3_prime_flank_annotation_negative_strand(self):
     m = MutationDataFactory.default_create(chr="5",
                                            start="1253255",
                                            end="1253255",
                                            ref_allele="A",
                                            alt_allele="T",
                                            build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "3'Flank")
Example #19
0
 def test_single_sample_onp_combiner(self):
     """test that we can create an onp combined TCGA maf without crashing"""
     input_filename = 'testdata/maflite/onp.singlesample.maf.txt'
     output_filename = 'out/testSingleSampleOnpCombiner.maf'
     config = TestUtils.createUnitTestConfig()
     defaultdb = config.get('DEFAULT',"dbDir")
     spec = RunSpecificationFactory.create_run_spec("MAFLITE","TCGAMAF", input_filename, output_filename,datasourceDir=defaultdb,
                                             other_opts={OptionConstants.INFER_ONPS: True})
     annotator = Annotator()
     annotator.initialize(spec)
     annotator.annotate()
Example #20
0
 def test_single_sample_onp_combiner(self):
     """test that we can create an onp combined TCGA maf without crashing"""
     input_filename = 'testdata/maflite/onp.singlesample.maf.txt'
     output_filename = 'out/testSingleSampleOnpCombiner.maf'
     config = TestUtils.createUnitTestConfig()
     defaultdb = config.get('DEFAULT',"dbDir")
     spec = RunSpecificationFactory.create_run_spec("MAFLITE","TCGAMAF", input_filename, output_filename,
                                                    datasource_dir=defaultdb,
                                             other_opts={OptionConstants.INFER_ONPS: True})
     annotator = Annotator()
     annotator.initialize(spec)
     annotator.annotate()
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds("out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)
Example #22
0
    def test_basic_tag_filtering(self):
        """Test several cases for the BasicTagTranscriptFilter"""
        tx_filter = TranscriptFilterFactory.create_instance("basic")

        ensembl_ds = TestUtils._create_test_gencode_v19_ds(
            "out/basic_tag_filter_ensembl_ds")
        tx_dict = ensembl_ds.getTranscriptDict()

        tx = tx_dict["ENST00000215832.6"]
        self.assertTrue(len(tx_filter.filter([tx])) == 1)

        attrib_dict = tx.get_other_attributes()
        attrib_dict.pop('tag', None)

        self.assertTrue(len(tx_filter.filter([tx])) == 0)
 def test_hgvs_annotations_IGR(self):
     """Test that the HGVS annotations appear for IGR"""
     ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_IGR_")
     m = MutationData()
     m.createAnnotation('variant_type', 'SNP')
     m.createAnnotation('build', 'hg19')
     m.createAnnotation('variant_classification', 'IGR')
     m.createAnnotation('chr', '15')
     m.createAnnotation('start', 30938316)
     m.createAnnotation('end', 30938316)
     m.createAnnotation('ref_allele', 'G')
     m.createAnnotation('alt_allele', 'A')
     m2 = ds.annotate_mutation(m)
     self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A')
     self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '')
     self.assertEqual(m2.get('HGVS_protein_change', None), '')
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
    def test_retrieve_transcript_by_gene(self):
        """Simple test of retrieve_transcript_by_gene """
        gene = "MAPK1"

        ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_")
        txs = ds.retrieve_transcripts_by_gene(gene)

        self.assertTrue(len(txs) > 2)

        tx_ids = [tx.get_transcript_id() for tx in txs]

        self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene)
        self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene)

        for tx in txs:
            self.assertTrue(tx.get_gene() == gene)
 def test_hgvs_annotations_IGR(self):
     """Test that the HGVS annotations appear for IGR"""
     ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_IGR_")
     m = MutationData()
     m.createAnnotation('variant_type', 'SNP')
     m.createAnnotation('build', 'hg19')
     m.createAnnotation('variant_classification', 'IGR')
     m.createAnnotation('chr', '15')
     m.createAnnotation('start', 30938316)
     m.createAnnotation('end', 30938316)
     m.createAnnotation('ref_allele', 'G')
     m.createAnnotation('alt_allele', 'A')
     m2 = ds.annotate_mutation(m)
     self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr15.hg19:g.30938316G>A')
     self.assertEqual(m2.get('HGVS_coding_DNA_change', None), '')
     self.assertEqual(m2.get('HGVS_protein_change', None), '')
Example #27
0
 def test_appris_ccds_tag(self):
     m = MutationDataFactory.default_create(chr="1",
                                            start="200818757",
                                            end="200818757",
                                            ref_allele="C",
                                            alt_allele="A",
                                            build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(
         tx is not None,
         "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?"
     )
     self.assertEqual(tx._transcript_id, 'ENST00000358823.2')
    def test_retrieve_transcript_by_gene(self):
        """Simple test of retrieve_transcript_by_gene """
        gene = "MAPK1"

        ds = TestUtils._create_test_gencode_v19_ds("out/test_retrieve_transcript_by_gene_")
        txs = ds.retrieve_transcripts_by_gene(gene)

        self.assertTrue(len(txs) > 2)

        tx_ids = [tx.get_transcript_id() for tx in txs]

        self.assertTrue("ENST00000398822.3" in tx_ids, "ENST00000398822.3 not in gene %s -- is the version number correct?" % gene)
        self.assertTrue("ENST00000215832.6" in tx_ids, "ENST00000215832.6 not in gene %s -- is the version number correct?" % gene)

        for tx in txs:
            self.assertTrue(tx.get_gene() == gene)
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_no_mapping_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationData()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_empty_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
Example #34
0
 def test_onp_combiner_snp_then_multiallelic(self):
     """test that we can handle reading a SNP then multiallelic from a VCF without crashing"""
     input_filename = 'testdata/vcf/infer_onp_fail_snp_then_multiallelic.vcf'
     output_filename = 'out/testSNPThenMultiallelic.maf.annotated'
     config = TestUtils.createUnitTestConfig()
     default_db = config.get('DEFAULT', "dbDir")
     spec = RunSpecificationFactory.create_run_spec(
         "VCF",
         "TCGAMAF",
         input_filename,
         output_filename,
         datasource_dir=default_db,
         is_skip_no_alts=True,
         other_opts={
             OptionConstants.INFER_ONPS: True,
             OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS: True
         })
     annotator = Annotator()
     annotator.initialize(spec)
     annotator.annotate()
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationData()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
 def setUp(self):
     self.config = TestUtils.createUnitTestConfig()
     pass
Example #38
0
import unittest
import os
from oncotator.input.OnpQueue import OnpQueue

from oncotator.utils.RunSpecificationFactory import RunSpecificationFactory
from test.TestUtils import TestUtils
from oncotator.input.MafliteInputMutationCreator import MafliteInputMutationCreator
from oncotator.input.OnpCombiner import OnpCombiner
from oncotator.MutationData import MutationData
from oncotator.Annotator import Annotator
from oncotator.utils.OptionConstants import OptionConstants

__author__ = 'louisb'

TestUtils.setupLogging(__file__, __name__)


class OnpCombinerTest(unittest.TestCase):

    def test_output_order(self):
        """Test that indels are not output out of order"""
        inputs = [(1, 1, 1, "C", "G", "hg19"),
                    (1, 2, 2, "-", "T", "hg19"),
                    (1, 3, 3, "A", "G", "hg19")]
        self._onp_ordered_combiner_test(inputs, inputs)

    def test_output_order_chrom_boundry(self):
        """Test that indels are not output out of order"""
        inputs = [(1, 1, 1, "C", "G", "hg19"),
                    (2, 2, 2, "A", "G", "hg19"),
 def test_check_for_appris_tag(self):
     """Test that a transcript with an appris tag returns the right rank"""
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_tag",)
     txs = ds.get_overlapping_transcripts("22", 22222050, 22222050, padding=100)
     self.assertTrue( len(txs) == 1)
     self.assertEquals(ds._get_appris_rank(txs[0]),0)
 def setUp(self):
     self.config = TestUtils.createUnitTestConfig()
import unittest
from oncotator.DuplicateAnnotationException import DuplicateAnnotationException
from oncotator.MutationData import MutationData
from oncotator.MutationDataFactory import MutationDataFactory
from test.TestUtils import TestUtils

TestUtils.setupLogging(__file__, __name__)


class MutationFactoryTest(unittest.TestCase):

    _multiprocess_can_split_ = True

    def test_annotation_overwriting_on(self):
        """Test that the factory can produce a mutation that allows overwriting.  Just need to make sure no exception thrown."""
        mdf = MutationDataFactory(allow_overwriting=True)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        mut.createAnnotation("blah", "456")
        self.assertTrue(mut['blah'] == "456")

    def test_annotation_overwriting_off(self):
        """Test that the factory can produce a mutation that does not allow overwriting.  Make sure DuplicateAnnotationException is thrown."""
        mdf = MutationDataFactory(allow_overwriting=False)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")
 def test_not_5_prime_flank_annotation_positive_strand(self):
     m = MutationDataFactory.default_create(chr="3", start="180625088", end="180625088", ref_allele="C", alt_allele="A", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "IGR")
 def test_check_for_missing_appris_tag(self):
     """Check that the correct value is returned for a site with no appris tag """
     ds = TestUtils._create_test_gencode_v19_ds("out/appris_no_tag",)
     txs = ds.get_overlapping_transcripts("16", 61556, 61556, padding=100)
     self.assertTrue( len(txs) > 0)
     self.assertEquals(ds._get_appris_rank(txs[0]), TranscriptProviderUtils.NO_APPRIS_VALUE)
 def test_3_prime_flank_annotation_negative_strand(self):
     m = MutationDataFactory.default_create(chr="5", start="1253255", end="1253255", ref_allele="A", alt_allele="T", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "3'Flank")
 def test_overlapping_multiple_genes(self):
     """Test that we can collect multiple overlapping genes """
     ds = TestUtils._create_test_gencode_ds("out/overlapping_genes_multiple_")
     genes = ds.get_overlapping_genes("22", 22080000, 22120000)
     self.assertTrue(len(set(["MAPK1", "YPEL1"]) - genes) ==0 )
 def test_overlapping_gene(self):
     """Test that we can collect an overlapping gene """
     ds = TestUtils._create_test_gencode_ds("out/overlapping_genes_")
     genes = ds.get_overlapping_genes("22", 22115000, 22120000)
     self.assertTrue(len(set(["MAPK1"]) - genes) == 0)
 def test_overlapping_multiple_genes(self):
     """Test that we can collect multiple overlapping genes """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_multiple_")
     genes = ds.get_overlapping_genes("22", 22080000, 22120000)
     self.assertTrue(len({"MAPK1", "YPEL1"} - genes) ==0 )
 def test_overlapping_gene(self):
     """Test that we can collect an overlapping gene """
     ds = TestUtils._create_test_gencode_v19_ds("out/overlapping_genes_")
     genes = ds.get_overlapping_genes("22", 22115000, 22120000)
     self.assertTrue(len({"MAPK1"} - genes) == 0)