Exemple #1
0
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationDataFactory.default_create()
        seg1.chr = "22"
        seg1.start = "24734447"  # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationDataFactory.default_create()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543"  # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(
            seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(
            seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(
            seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl','w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(
            m['variant_classification'] == "Intron",
            "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource."
        )
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl', 'w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1),
                                                  (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Exemple #5
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationDataFactory.default_create()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationDataFactory.default_create()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Exemple #6
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationDataFactory.default_create()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationDataFactory.default_create()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Intron", "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource.")
    def test_simple_collapse(self):
        """Ensure simple rules for numeric collapsing are honored"""
        m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
        m1.createAnnotation('ALT_F2R1', "34|36")
        m1.createAnnotation('i_t_Foxog', ".509|.511")
        m1.createAnnotation('i_tumor_f', ".200|.210")
        m1.createAnnotation('hamilcar', "0|0")
        m1.createAnnotation('donotcollapse', "1|45")

        m2 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
        m2.createAnnotation('ALT_F2R1', "36|38")
        m2.createAnnotation('i_t_Foxog', ".500|.510")
        m2.createAnnotation('i_tumor_f', ".100|.110")
        m2.createAnnotation('hamilcar', "0.01|0")
        m2.createAnnotation('barca', "0.02|0")
        m2.createAnnotation('donotcollapse', "100|4500")

        cc = ColumnCollapser()
        cc.update_mutation(m1)
        self.assertEqual(m1['ALT_F2R1'], "34")
        self.assertEqual(float(m1['i_t_Foxog']), float(".510"))
        self.assertEqual(float(m1['i_tumor_f']), float(".205"))
        self.assertEqual(float(m1['hamilcar']), float("0"))
        self.assertEqual(m1['donotcollapse'], "1|45")

        cc.update_mutation(m2)
        self.assertEqual(m2['ALT_F2R1'], "36")
        self.assertEqual(float(m2['i_t_Foxog']), float(".505"))
        self.assertEqual(float(m2['i_tumor_f']), float(".105"))
        self.assertEqual(float(m2['hamilcar']), float("0.005"))
        self.assertEqual(float(m2['barca']), float("0.01"))
        self.assertEqual(m2['donotcollapse'], "100|4500")
Exemple #9
0
    def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build, mutation_data_factory=None):
        mutation_data_factory = MutationDataFactory() if mutation_data_factory is None else mutation_data_factory
        mut = mutation_data_factory.create(str(chr), str(start), str(end), ref_allele, alt_allele, str(build))
        varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele)

        if TranscriptProviderUtils.is_xnp(varType):  # Snps and other xNPs
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
        if varType == VariantClassification.VT_DEL:  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)
        elif varType == VariantClassification.VT_INS:  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)

        return mut
Exemple #10
0
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/small_positive_strand_")

        # Now for a negative strand
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1A>C",
            "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['transcript_change'] == "c.1G>T",
            "Incorrect transcript change: " + m2['transcript_change'])
Exemple #11
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationDataFactory.default_create()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationDataFactory.default_create()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
    def test_mutation_combiner_ordering(self):
        """Test that ordering of combined attributes makes matches original order"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeDepth", "2")
        mut1.createAnnotation("AnotherDepth", "1")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeDepth", "1")
        mut2.createAnnotation("AnotherDepth", "2")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeDepth", "2|1")
        expected.createAnnotation("AnotherDepth", "1|2")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
    def test_mutation_combiner_identical_annotation(self):
        """Test that annotations with all identical values are not repeated with | between them"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SampleName", "John Doe")

        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SampleName", "John Doe")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SampleName", "John Doe")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationDataFactory.default_create(chr=1,
                                                  start=100,
                                                  end=100,
                                                  ref_allele="G",
                                                  alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING",
                              "a value")
        mut2 = MutationDataFactory.default_create(chr=1,
                                                  start=101,
                                                  end=101,
                                                  ref_allele="C",
                                                  alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue", "5")
        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1,
                                                      start=100,
                                                      end=101,
                                                      ref_allele="GC",
                                                      alt_allele="AT")
        expected.createAnnotation("SomeValue",
                                  "value1|value2",
                                  "INPUT",
                                  "STRING",
                                  "a value",
                                  tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Exemple #18
0
    def test_annotation_copy_collision(self):
        """Test that annotation copy will use the bahavior of the mutation in case of collision due to suffix"""
        m1 = MutationDataFactory.default_create(chr="1",
                                                start="10000",
                                                end="10000")
        m1.createAnnotation('ALT_F2R1', "30|36", annotationSource="TEST")
        m1.createAnnotation('ALT_F2R1_full',
                            "going_to_be_overwritten",
                            annotationSource="TEST")

        is_exception_seen = False
        cc = ColumnCollapser()
        try:
            cc.update_mutation(m1, copy_old_suffix="_full")
        except DuplicateAnnotationException as dae:
            is_exception_seen = True
        self.assertTrue(is_exception_seen,
                        "Did not see duplicate annotation exception")

        m1 = MutationDataFactory.default_create(chr="1",
                                                start="10000",
                                                end="10000",
                                                allow_overwriting=True)
        m1.createAnnotation('ALT_F2R1', "30|36", annotationSource="TEST")
        m1.createAnnotation('ALT_F2R1_full',
                            "going_to_be_overwritten",
                            annotationSource="TEST")
        cc = ColumnCollapser()
        cc.update_mutation(m1, copy_old_suffix="_full")
        self.assertEqual(m1['ALT_F2R1_full'], "30|36")
        self.assertEqual(m1['ALT_F2R1'], "30")
Exemple #19
0
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele,
                                                 alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T",
                        "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele,
                                                 alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT",
                        "Alt allele should be TT but was %s." % mut.alt_allele)
    def test_annotation_overwriting_on(self):
        """Test that the factory can produce a mutation that allows overwriting.  Just need to make sure no exception thrown."""
        mdf = MutationDataFactory(allow_overwriting=True)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        mut.createAnnotation("blah", "456")
        self.assertTrue(mut['blah'] == "456")
    def test_annotation_overwriting_on(self):
        """Test that the factory can produce a mutation that allows overwriting.  Just need to make sure no exception thrown."""
        mdf = MutationDataFactory(allow_overwriting=True)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        mut.createAnnotation("blah", "456")
        self.assertTrue(mut['blah'] == "456")
Exemple #22
0
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING", "a value")
        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue","5")
        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SomeValue", "value1|value2", "INPUT", "STRING", "a value", tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Exemple #23
0
    def testOverwriteAnnotationsSupported(self):
        """Test that mutations support overwrite annotation in the VCFInputMutationCreator. (white box testing)"""
        inputFilename = os.path.join(*["testdata", "vcf", "example.trailing_whitespace_in_alleles.vcf"])


        vcf_overwriting_disallowed = VcfInputMutationCreator(inputFilename, MutationDataFactory())
        vcf_overwriting_allowed = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True))

        mutations = vcf_overwriting_disallowed.createMutations()
        for m in mutations:
            self.assertTrue(m._new_required)

        mutations = vcf_overwriting_allowed.createMutations()
        for m in mutations:
            self.assertFalse(m._new_required)
Exemple #24
0
    def test_mutation_combiner_identical_annotation(self):
        """Test that annotations with all identical values are not repeated with | between them"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SampleName", "John Doe")

        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SampleName", "John Doe" )

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SampleName", "John Doe")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
    def test_annotation_overwriting_off(self):
        """Test that the factory can produce a mutation that does not allow overwriting.  Make sure DuplicateAnnotationException is thrown."""
        mdf = MutationDataFactory(allow_overwriting=False)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        is_exception_raised = False
        try:
            mut.createAnnotation("blah", "456")
        except DuplicateAnnotationException as dae:
            is_exception_raised = True

        self.assertTrue(is_exception_raised, "DuplicateAnnotationException should have been seen, but wasn't")
Exemple #26
0
    def initializeMutFromAttributes(chr,
                                    start,
                                    end,
                                    ref_allele,
                                    alt_allele,
                                    build,
                                    mutation_data_factory=None):
        mutation_data_factory = MutationDataFactory(
        ) if mutation_data_factory is None else mutation_data_factory
        mut = mutation_data_factory.create(str(chr), str(start), str(end),
                                           ref_allele, alt_allele, str(build))
        varType = TranscriptProviderUtils.infer_variant_type(
            mut.ref_allele, mut.alt_allele)

        if TranscriptProviderUtils.is_xnp(varType):  # Snps and other xNPs
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue="")
        if varType == VariantClassification.VT_DEL:  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue=preceding_bases)
        elif varType == VariantClassification.VT_INS:  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(
                annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                annotationValue=preceding_bases)

        return mut
Exemple #27
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Exemple #28
0
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds(
            "out/test_canonical_tx_list_")
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(
            m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(
            m2['annotation_transcript'].startswith("ENST00000544786"))
Exemple #29
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
Exemple #30
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
 def __init__(self, sourceFilename, mutation_data_factory, configFile="", genomeBuild="hg19", other_options=None):
     """
     Constructor
     """
     if mutation_data_factory is None:
         logging.getLogger(__name__).info("No mutation data factory provided, using default settings.")
     self._mutation_data_factory = MutationDataFactory() if mutation_data_factory is None else mutation_data_factory
Exemple #32
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationDataFactory.default_create()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
    def testESPCoverageAnnotationWithSNPAvgMatch(self):
        """
        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "X"
        m1.start = "100075334"
        m1.end = "100075334"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Exemple #35
0
    def testExtentOutOfRangeError(self):
        ''' If a window is specified that extends beyond the beginning or end of a file, truncate the ref_context.  
        Use what is left for gc_content as well.'''
        ds = ReferenceDatasource('testdata/reference_ds',
                                 windowSizeRef=6,
                                 windowSizeGCContent=5)
        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "4"
        m.end = "4"

        # "CCCAAGCTAAACCCAGGCCAC"
        groundTruth = "CCCAAGCTAA"

        guess = ds.annotate_mutation(m)

        self.assertTrue(
            guess['ref_context'] == groundTruth,
            "ref_context was not populated properly: " +
            str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(
            fabs(float(guess['gc_content']) - (float(5) / float(9))) < .001,
            "gc_content was not populated properly: " +
            str(guess['gc_content']))
    def testESPCoverageAnnotationWithMissingAnnotationValuesIndelAvgMatch(
            self):
        """

        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_avg_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationDataFactory.default_create()
        m1.chr = "X"
        m1.start = "100075350"
        m1.end = "100075356"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgSampleReadDepth")
        cur_annotation = Annotation(
            value="91.25",
            datasourceName="ESP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
 def test_appris_selects_transcript(self):
     m = MutationDataFactory.default_create(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(tx is not None, "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?")
     self.assertEqual(tx._transcript_id,'ENST00000321356.4')
Exemple #39
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationDataFactory.default_create()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
 def test_not_updating_annotation_source(self):
     """Test that do not have to update annotation source if columns are collapsed"""
     m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
     m1.createAnnotation('ALT_F2R1', "|36", annotationSource="TEST")
     cc = ColumnCollapser()
     cc.update_mutation(m1)
     self.assertEqual(m1.getAnnotation("ALT_F2R1").getDatasource(), "TEST")
Exemple #41
0
    def testAnnotationRoundTripEmpty(self):
        """Read a VCF, annotate it with no datasources, write it, and read it again without changes"""
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        outputFilename = os.path.join("out",
                                      "test_round_trip_empty_annotated.vcf")

        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True

        run_spec = RunSpecificationFactory.create_run_spec(
            "VCF",
            "VCF",
            inputFilename,
            outputFilename,
            datasource_dir="THIS_DIR_DOES_NOT_EXIST__",
            genomeBuild="hg19",
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)
        annotated_filename = annotator.annotate()

        vcf_input2 = VcfInputMutationCreator(
            annotated_filename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(
            src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz",
            title="Cosmic",
            version="test",
            gpp_tabix_file=tabixDir +
            "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1,
            "Could not find the entry specified by genomic coords.")
        self.assertTrue(
            m['COSMIC_overlapping_primary_sites'] == "lung(3)",
            "Did not have the correct primary sites annotation (lung(3)): " +
            m['COSMIC_overlapping_primary_sites'])
Exemple #43
0
 def testAddTag(self):
     ''' Test adding a tag to an annotation '''
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     self.assertTrue("fakeTag" in m.getAnnotation("fake1").getTags(),
                     "Tag was not added properly.")
 def _onp_ordered_combiner_test(self, inputs, expected):
     input_muts = iter(self._tuples_to_MutationData(inputs))
     expected_muts = self._tuples_to_MutationData(expected)
     mut_factory = MutationDataFactory()
     combiner = OnpQueue(input_muts, mut_factory)
     results = list(combiner.get_combined_mutations())
     self._assert_mutation_lists_equal(expected_muts, results)
    def test_validation_correction_valid(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation.
        """
        m = MutationDataFactory.default_create()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Valid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out", "test_validation_correction2.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join("configs", "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.")
            self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.")
            self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) )
            self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele'])
            self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
    def test_tnp_blank_snp(self):
        """Test a harder scenario for ONP combination"""
        mut1 = MutationData(chr=1,
                            start=100,
                            end=100,
                            ref_allele="G",
                            alt_allele="A")
        mut1.createAnnotation("phasing_id", "value1", "INPUT")
        mut1.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut2 = MutationData(chr=1,
                            start=101,
                            end=101,
                            ref_allele="C",
                            alt_allele="T")
        mut2.createAnnotation("phasing_id", "value1", "INPUT")
        mut2.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut3 = MutationData(chr=1,
                            start=102,
                            end=102,
                            ref_allele="C",
                            alt_allele="T")
        mut3.createAnnotation("phasing_id", "value1", "INPUT")
        mut3.createAnnotation("phasing_genotype", "0|1", "INPUT")

        # Note the differing ID in mut4
        mut4 = MutationData(chr=1,
                            start=103,
                            end=103,
                            ref_allele="C",
                            alt_allele="T")
        mut4.createAnnotation("phasing_id", "value2", "INPUT")
        mut4.createAnnotation("phasing_genotype", "0|1", "INPUT")

        mut5 = MutationData(chr=1,
                            start=104,
                            end=104,
                            ref_allele="C",
                            alt_allele="T")
        mut5.createAnnotation("phasing_id", "value1", "INPUT")
        mut5.createAnnotation("phasing_genotype", "0|1", "INPUT")

        # Note separate chromosome for mut6
        mut6 = MutationData(chr=2,
                            start=105,
                            end=105,
                            ref_allele="C",
                            alt_allele="T")
        mut6.createAnnotation("phasing_id", "value1", "INPUT")
        mut6.createAnnotation("phasing_genotype", "0|1", "INPUT")

        gt_alts = ["ATT", "T", "T", "T"]
        mutations = [mut1, mut2, mut3, mut4, mut5, mut6]
        mdf = MutationDataFactory()
        queue = OnpQueue(mutations, mdf)

        for i, mut in enumerate(queue.get_combined_mutations()):
            self.assertTrue(gt_alts[i] == mut.alt_allele)
Exemple #47
0
 def test_range_fetch(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation('chr', '1')
     m.createAnnotation('start', 78978)
     m.createAnnotation('end', 79000)
     
     self.bigwig_datasource.annotate_mutation(m)
     self.assertEqual(m.get('TestBigWig_score'), 0.75)
Exemple #48
0
 def testIter(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     for k in m:
         self.assertTrue((k in ["fake1", "fake2"])
                         or (k in MutationData.attributes),
                         "Key not present: " + k)
Exemple #49
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationDataFactory.default_create()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
Exemple #50
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationDataFactory.default_create()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
    def test_annotation_overwriting_off(self):
        """Test that the factory can produce a mutation that does not allow overwriting.  Make sure DuplicateAnnotationException is thrown."""
        mdf = MutationDataFactory(allow_overwriting=False)
        mut = mdf.create()

        mut.createAnnotation("blah", "123")
        self.assertTrue(mut['blah'] == "123")

        is_exception_raised = False
        try:
            mut.createAnnotation("blah", "456")
        except DuplicateAnnotationException as dae:
            is_exception_raised = True

        self.assertTrue(
            is_exception_raised,
            "DuplicateAnnotationException should have been seen, but wasn't")
Exemple #52
0
    def test_mutation_combiner_ordering(self):
        """Test that ordering of combined attributes makes matches original order"""
        mut1 = MutationDataFactory.default_create(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SomeDepth", "2")
        mut1.createAnnotation("AnotherDepth", "1")

        mut2 = MutationDataFactory.default_create(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SomeDepth", "1" )
        mut2.createAnnotation("AnotherDepth", "2")

        mdf = MutationDataFactory()
        result = OnpQueue._combine_mutations([mut1, mut2], mdf)

        expected = MutationDataFactory.default_create(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SomeDepth", "2|1")
        expected.createAnnotation("AnotherDepth","1|2")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
Exemple #53
0
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationDataFactory.default_create(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an exception is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
Exemple #55
0
 def testSetValues(self):
     m = MutationDataFactory.default_create()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     self.assertTrue(m["fake1"] == "1", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake1"]))
     self.assertTrue(m["fake2"] == "blah blah", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake2"]))
     
     m["fake2"] = "Whoa"
     self.assertTrue(m["fake2"] == "Whoa", "Could not properly retrieve annotation using the dictionary interface, after a value change.")
     print(str(m))
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
        
        m = MutationDataFactory.default_create()
        m.createAnnotation('gene',"ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
 def test_annotation_copy(self):
     """Test that we can create a backup annotation with the old values after collapsing, if requested."""
     m1 = MutationDataFactory.default_create(chr="1", start="10000", end="10000")
     m1.createAnnotation('ALT_F2R1', "|36", annotationSource="TEST")
     cc = ColumnCollapser()
     cc.update_mutation(m1, new_annotation_source="foo", copy_old_suffix="_full")
     self.assertEqual(m1["ALT_F2R1_full"], "|36")
     self.assertEqual(m1["ALT_F2R1"], "36")
     self.assertEqual(m1.getAnnotation("ALT_F2R1_full").getDatasource(), "TEST")
     self.assertTrue(m1.getAnnotation("ALT_F2R1").getDatasource() != m1.getAnnotation("ALT_F2R1_full").getDatasource())
 def testAnnotationSourceIsPopulated(self):
     ''' Tests that the annotation source is not blank for the example tsv datasource. '''
     geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
     self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
     
     m = MutationDataFactory.default_create()
     m.createAnnotation('gene',"ABL1")
     m = geneDS.annotate_mutation(m)
     self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource() <> "Unknown", "Annotation source was unknown")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource().strip() <> "", "Annotation source was blank")