Ejemplo n.º 1
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Intron", "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource.")
Ejemplo n.º 2
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationDataFactory.default_create()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationDataFactory.default_create()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
Ejemplo n.º 4
0
    def test_simple_collapse(self):
        """Ensure simple rules for numeric collapsing are honored"""
        m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
        m1.createAnnotation('ALT_F2R1', "34|36")
        m1.createAnnotation('i_t_Foxog', ".509|.511")
        m1.createAnnotation('i_tumor_f', ".200|.210")
        m1.createAnnotation('hamilcar', "0|0")
        m1.createAnnotation('donotcollapse', "1|45")

        m2 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
        m2.createAnnotation('ALT_F2R1', "36|38")
        m2.createAnnotation('i_t_Foxog', ".500|.510")
        m2.createAnnotation('i_tumor_f', ".100|.110")
        m2.createAnnotation('hamilcar', "0.01|0")
        m2.createAnnotation('barca', "0.02|0")
        m2.createAnnotation('donotcollapse', "100|4500")

        cc = ColumnCollapser()
        cc.update_mutation(m1)
        self.assertEqual(m1['ALT_F2R1'], "34")
        self.assertEqual(float(m1['i_t_Foxog']), float(".510"))
        self.assertEqual(float(m1['i_tumor_f']), float(".205"))
        self.assertEqual(float(m1['hamilcar']), float("0"))
        self.assertEqual(m1['donotcollapse'], "1|45")

        cc.update_mutation(m2)
        self.assertEqual(m2['ALT_F2R1'], "36")
        self.assertEqual(float(m2['i_t_Foxog']), float(".505"))
        self.assertEqual(float(m2['i_tumor_f']), float(".105"))
        self.assertEqual(float(m2['hamilcar']), float("0.005"))
        self.assertEqual(float(m2['barca']), float("0.01"))
        self.assertEqual(m2['donotcollapse'], "100|4500")
Ejemplo n.º 5
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationDataFactory.default_create()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationDataFactory.default_create()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Ejemplo n.º 6
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(
            m['variant_classification'] == "Intron",
            "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource."
        )
Ejemplo n.º 7
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl','w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Ejemplo n.º 8
0
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING",
                              "a value")
        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue", "5")
        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeValue",
                                  "value1|value2",
                                  "INPUT",
                                  "STRING",
                                  "a value",
                                  tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 9
0
    def test_annotation_copy_collision(self):
        """Test that annotation copy will use the bahavior of the mutation in case of collision due to suffix"""
        m1 = MutationDataFactory.default_create(chr="1",
                                                start="10000",
                                                end="10000")
        m1.createAnnotation('ALT_F2R1', "30|36", annotationSource="TEST")
        m1.createAnnotation('ALT_F2R1_full',
                            "going_to_be_overwritten",
                            annotationSource="TEST")

        is_exception_seen = False
        cc = ColumnCollapser()
        try:
            cc.update_mutation(m1, copy_old_suffix="_full")
        except DuplicateAnnotationException as dae:
            is_exception_seen = True
        self.assertTrue(is_exception_seen,
                        "Did not see duplicate annotation exception")

        m1 = MutationDataFactory.default_create(chr="1",
                                                start="10000",
                                                end="10000",
                                                allow_overwriting=True)
        m1.createAnnotation('ALT_F2R1', "30|36", annotationSource="TEST")
        m1.createAnnotation('ALT_F2R1_full',
                            "going_to_be_overwritten",
                            annotationSource="TEST")
        cc = ColumnCollapser()
        cc.update_mutation(m1, copy_old_suffix="_full")
        self.assertEqual(m1['ALT_F2R1_full'], "30|36")
        self.assertEqual(m1['ALT_F2R1'], "30")
Ejemplo n.º 10
0
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationDataFactory.default_create()
        seg1.chr = "22"
        seg1.start = "24734447"  # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationDataFactory.default_create()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543"  # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(
            seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(
            seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(
            seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
Ejemplo n.º 11
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationDataFactory.default_create()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationDataFactory.default_create()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Ejemplo n.º 12
0
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/small_positive_strand_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1A>C",
            "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1G>T",
            "Incorrect transcript change: " + m2['transcript_change'])
Ejemplo n.º 13
0
    def test_mutation_combiner_ordering(self):
        """Test that ordering of combined attributes makes matches original order"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeDepth", "2")
        mut1.createAnnotation("AnotherDepth", "1")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeDepth", "1")
        mut2.createAnnotation("AnotherDepth", "2")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeDepth", "2|1")
        expected.createAnnotation("AnotherDepth", "1|2")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 14
0
    def test_mutation_combiner_identical_annotation(self):
        """Test that annotations with all identical values are not repeated with | between them"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SampleName", "John Doe")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SampleName", "John Doe")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SampleName", "John Doe")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 15
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Ejemplo n.º 16
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Ejemplo n.º 17
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl', 'w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1),
                                                  (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Ejemplo n.º 18
0
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele,
                                                 alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T",
                        "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele,
                                                 alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT",
                        "Alt allele should be TT but was %s." % mut.alt_allele)
Ejemplo n.º 19
0
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING", "a value")
        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue","5")
        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SomeValue", "value1|value2", "INPUT", "STRING", "a value", tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 20
0
    def test_mutation_combiner_identical_annotation(self):
        """Test that annotations with all identical values are not repeated with | between them"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SampleName", "John Doe")

        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SampleName", "John Doe" )

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SampleName", "John Doe")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 21
0
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(
            m2['annotation_transcript'].startswith("ENST00000544786"))
    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Ejemplo n.º 23
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationDataFactory.default_create()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
    def testESPCoverageAnnotationWithSNPAvgMatch(self):
        """
        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "X"
        m1.start = "100075334"
        m1.end = "100075334"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
 def test_appris_selects_transcript(self):
     m = MutationDataFactory.default_create(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(tx is not None, "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?")
     self.assertEqual(tx._transcript_id,'ENST00000321356.4')
Ejemplo n.º 26
0
    def testExtentOutOfRangeError(self):
        ''' If a window is specified that extends beyond the beginning or end of a file, truncate the ref_context.  
        Use what is left for gc_content as well.'''
        ds = ReferenceDatasource('testdata/reference_ds',
                                 windowSizeRef=6,
                                 windowSizeGCContent=5)
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "4"
        m.end = "4"

        # "CCCAAGCTAAACCCAGGCCAC"
        groundTruth = "CCCAAGCTAA"

        guess = ds.annotate_mutation(m)

        self.assertTrue(
            guess['ref_context'] == groundTruth,
            "ref_context was not populated properly: " +
            str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(
            fabs(float(guess['gc_content']) - (float(5) / float(9))) < .001,
            "gc_content was not populated properly: " +
            str(guess['gc_content']))
    def testESPCoverageAnnotationWithMissingAnnotationValuesIndelAvgMatch(
            self):
        """

        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_avg_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "X"
        m1.start = "100075350"
        m1.end = "100075356"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgSampleReadDepth")
        cur_annotation = Annotation(
            value="91.25",
            datasourceName="ESP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
Ejemplo n.º 28
0
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(
            src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz",
            title="Cosmic",
            version="test",
            gpp_tabix_file=tabixDir +
            "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1,
            "Could not find the entry specified by genomic coords.")
        self.assertTrue(
            m['COSMIC_overlapping_primary_sites'] == "lung(3)",
            "Did not have the correct primary sites annotation (lung(3)): " +
            m['COSMIC_overlapping_primary_sites'])
Ejemplo n.º 29
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
Ejemplo n.º 30
0
 def test_not_updating_annotation_source(self):
     """Test that do not have to update annotation source if columns are collapsed"""
     m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
     m1.createAnnotation('ALT_F2R1', "|36", annotationSource="TEST")
     cc = ColumnCollapser()
     cc.update_mutation(m1)
     self.assertEqual(m1.getAnnotation("ALT_F2R1").getDatasource(), "TEST")
Ejemplo n.º 31
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Ejemplo n.º 32
0
    def test_validation_correction_valid(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation.
        """
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Valid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out", "test_validation_correction2.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join("configs", "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.")
            self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.")
            self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) )
            self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele'])
            self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
Ejemplo n.º 33
0
 def testAddTag(self):
     ''' Test adding a tag to an annotation '''
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     self.assertTrue("fakeTag" in m.getAnnotation("fake1").getTags(),
                     "Tag was not added properly.")
Ejemplo n.º 34
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationDataFactory.default_create()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Ejemplo n.º 35
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Ejemplo n.º 36
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
Ejemplo n.º 37
0
 def test_range_fetch(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation('chr', '1')
     m.createAnnotation('start', 78978)
     m.createAnnotation('end', 79000)
     
     self.bigwig_datasource.annotate_mutation(m)
     self.assertEqual(m.get('TestBigWig_score'), 0.75)
Ejemplo n.º 38
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationDataFactory.default_create()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
Ejemplo n.º 39
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationDataFactory.default_create()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
Ejemplo n.º 40
0
 def testIter(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     for k in m:
         self.assertTrue((k in ["fake1", "fake2"])
                         or (k in MutationData.attributes),
                         "Key not present: " + k)
Ejemplo n.º 41
0
 def test_not_updating_annotation_source(self):
     """Test that do not have to update annotation source if columns are collapsed"""
     m1 = MutationDataFactory.default_create(chr="1",
                                             start="10000",
                                             end="10000")
     m1.createAnnotation('ALT_F2R1', "|36", annotationSource="TEST")
     cc = ColumnCollapser()
     cc.update_mutation(m1)
     self.assertEqual(m1.getAnnotation("ALT_F2R1").getDatasource(), "TEST")
Ejemplo n.º 42
0
    def test_mutation_combiner_ordering(self):
        """Test that ordering of combined attributes makes matches original order"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SomeDepth", "2")
        mut1.createAnnotation("AnotherDepth", "1")

        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SomeDepth", "1" )
        mut2.createAnnotation("AnotherDepth", "2")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SomeDepth", "2|1")
        expected.createAnnotation("AnotherDepth","1|2")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Ejemplo n.º 43
0
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))
Ejemplo n.º 44
0
    def test_validation_correction(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for invalid mutation.
        """
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Invalid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out",
                                       "test_validation_correction1.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join(
                                                   "configs",
                                                   "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Match_Norm_Validation_Allele2'],
                "Matched norm alleles did not match.")
            self.assertTrue(
                line_dict['Tumor_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match normal alleles for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Reference_Allele'],
                "Norm validation alleles did not match reference (norm, reference): (%s, %s)"
                % (line_dict['Match_Norm_Validation_Allele1'],
                   line_dict['Reference_Allele']))
            self.assertTrue(
                "G" == line_dict['Reference_Allele'],
                "Reference allele should have been G, but was " +
                line_dict['Reference_Allele'])
            self.assertTrue(
                "None" == line_dict['Mutation_Status'],
                "Mutation Status must be None when Validation Status is Invalid: "
                + line_dict['Mutation_Status'])
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an exception is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
Ejemplo n.º 46
0
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
        
        m = MutationDataFactory.default_create()
        m.createAnnotation('gene',"ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
Ejemplo n.º 47
0
 def testSetValues(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     self.assertTrue(m["fake1"] == "1", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake1"]))
     self.assertTrue(m["fake2"] == "blah blah", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake2"]))
     
     m["fake2"] = "Whoa"
     self.assertTrue(m["fake2"] == "Whoa", "Could not properly retrieve annotation using the dictionary interface, after a value change.")
     print(str(m))
Ejemplo n.º 48
0
 def test_annotation_copy(self):
     """Test that we can create a backup annotation with the old values after collapsing, if requested."""
     m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
     m1.createAnnotation('ALT_F2R1', "|36", annotationSource="TEST")
     cc = ColumnCollapser()
     cc.update_mutation(m1, new_annotation_source="foo", copy_old_suffix="_full")
     self.assertEqual(m1["ALT_F2R1_full"], "|36")
     self.assertEqual(m1["ALT_F2R1"], "36")
     self.assertEqual(m1.getAnnotation("ALT_F2R1_full").getDatasource(), "TEST")
     self.assertTrue(m1.getAnnotation("ALT_F2R1").getDatasource() != m1.getAnnotation("ALT_F2R1_full").getDatasource())
Ejemplo n.º 49
0
 def test_cached_annots_dummy_cache(self):
     """Test dummy cache.  Also, tests a simple store and retrieve, which should be None."""
     cm = CacheManager()
     fake_db_dir_key = "blah"
     cm.initialize(None, fake_db_dir_key, is_read_only=False)
     m = MutationDataFactory.default_create()
     m.createAnnotation("blah1", "val1", annotationSource="INPUT")
     m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
     cm.store_annotations_in_cache(m)
     annots = cm.retrieve_cached_annotations(m)
     self.assertTrue(annots is None)
Ejemplo n.º 50
0
 def test_not_5_prime_flank_annotation_positive_strand(self):
     m = MutationDataFactory.default_create(chr="3",
                                            start="180625088",
                                            end="180625088",
                                            ref_allele="C",
                                            alt_allele="A",
                                            build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "IGR")
Ejemplo n.º 51
0
 def test_3_prime_flank_annotation_negative_strand(self):
     m = MutationDataFactory.default_create(chr="5",
                                            start="1253255",
                                            end="1253255",
                                            ref_allele="A",
                                            alt_allele="T",
                                            build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = transcript_ds.annotate_mutation(m)
     self.assertEqual(m['variant_classification'], "3'Flank")
Ejemplo n.º 52
0
 def test_cached_annots_dummy_cache(self):
     """Test dummy cache.  Also, tests a simple store and retrieve, which should be None."""
     cm = CacheManager()
     fake_db_dir_key = "blah"
     cm.initialize(None, fake_db_dir_key, is_read_only=False)
     m = MutationDataFactory.default_create()
     m.createAnnotation("blah1", "val1", annotationSource="INPUT")
     m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
     cm.store_annotations_in_cache(m)
     annots = cm.retrieve_cached_annotations(m)
     self.assertTrue(annots is None)
    def testDatasourceCreator(self):
        """ Test that the datasource creator process will work for v1 of TranscriptToUniProtProteinPositionTransformingDatasource.  NOTE: This test needs to be updated to use sqlite instead of filesystem file.
        """

        tDS = DatasourceFactory.createDatasource("testdata/small_uniprot_prot_seq_ds/small_uniprot_prot_seq_ds.config", "testdata/small_uniprot_prot_seq_ds/")
        outputAnnotation = "UniProt_aapos"
        m = MutationDataFactory.default_create()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(m[outputAnnotation] == "969", "Did not get proper value (969): " + m[outputAnnotation])
Ejemplo n.º 54
0
 def testHeaderCreation(self):
     """Test that a tcga vcf header can be generated, even from a blank mutation. """
     vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt")
     m = MutationDataFactory.default_create()
     m.createAnnotation('center', "broad.mit.edu")
     hdr = vcfOR.createVcfHeader(m)
     self.assertTrue(hdr is not None)
     self.assertTrue(hdr <> "")
     self.assertTrue(
         hdr.find("broad.mit.edu") <> -1,
         "Could not find string that should have been in header.")
Ejemplo n.º 55
0
 def testAnnotationSourceIsPopulated(self):
     ''' Tests that the annotation source is not blank for the example tsv datasource. '''
     geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
     self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
     
     m = MutationDataFactory.default_create()
     m.createAnnotation('gene',"ABL1")
     m = geneDS.annotate_mutation(m)
     self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource() <> "Unknown", "Annotation source was unknown")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource().strip() <> "", "Annotation source was blank")
Ejemplo n.º 56
0
 def test_no_data_fetch(self):
     """Test for value not found in bigwig.  In this case, our test bigwig only has data for 
     chr1 so None is expected return value.
     """
     m = MutationDataFactory.default_create()
     m.createAnnotation('chr', '13')
     m.createAnnotation('start', 78978)
     m.createAnnotation('end', 79000)
     
     self.bigwig_datasource.annotate_mutation(m)
     self.assertEqual(m.get('TestBigWig_score'), None)
Ejemplo n.º 57
0
    def annotate_genes_given_txs(self, txs):
        """
        Given a list of Transcripts, create and annotate dummy mutations that represent only the gene.

        :param txs: list of Transcripts
        :type txs: list
        :return:
        """
        gene_to_tx_dict = {}
        for tx in txs:
            try:
                gene_to_tx_dict[tx.get_gene()].append(tx)
            except KeyError:
                gene_to_tx_dict[tx.get_gene()] = [tx]

        genes = set(gene_to_tx_dict.keys())
        genes = sorted(list(genes))
        muts_dict = {}
        for gene in genes:
            m = MutationDataFactory.default_create()
            m.createAnnotation("gene", gene)
            m.createAnnotation("transcripts", ",".join(sorted([tx.get_transcript_id() for tx in gene_to_tx_dict[gene]])))
            m.createAnnotation("strand", gene_to_tx_dict[gene][0].get_strand())
            m.createAnnotation("class", gene_to_tx_dict[gene][0].get_gene_type())
            endAA = str(max([len(tx.get_protein_seq()) for tx in gene_to_tx_dict[gene]]))
            m.createAnnotation("protein_change", "p.DUMMY1_" + endAA)
            m.createAnnotation("chr", gene_to_tx_dict[gene][0].get_contig())

            # Annotate each transcript and collapse the relevant transcript annotations for each gene.
            tx_muts_uncollapsed = [self.annotate_transcript(tx) for tx in gene_to_tx_dict[gene]]
            annotation_vals_collapsed = defaultdict(set)
            for tx_mut in tx_muts_uncollapsed:
                for annotation_name in tx_mut.keys():

                    # For every annotation on the dummy transcript (tx_mut), create a dictionary containing a
                    #  set of values.
                    # Only consider annotations that are not INPUT and the datasource is known.
                    invalid_annotation_sources = ["INPUT", "OUTPUT", "Unknown"]
                    if tx_mut.getAnnotation(annotation_name).getDatasource() not in invalid_annotation_sources:
                        annotation_vals_collapsed[annotation_name].add(tx_mut[annotation_name])

            # Create a new annotation that encompasses the transcript data for the gene.
            for new_annotation in annotation_vals_collapsed.keys():

                # Remove blank values from the set
                annotation_val_collapsed_set = annotation_vals_collapsed[new_annotation] - set([""])

                str_val = "|".join(sorted(list(annotation_val_collapsed_set)))
                m.createAnnotation(new_annotation, str_val, annotationSource="OUTPUT")

            muts_dict[gene] = m

        self._annotate_genes(muts_dict.values())
        return muts_dict