Esempio n. 1
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl','w'))

        m1 = MutationData()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationData()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationData()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationData()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
Esempio n. 3
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(
            m['variant_classification'] == "Intron",
            "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource."
        )
Esempio n. 4
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Esempio n. 5
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl', 'w'))

        m1 = MutationData()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationData()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1),
                                                  (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Esempio n. 6
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Esempio n. 7
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationData()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationData()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
Esempio n. 8
0
    def initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build):
        mut = MutationData(chrom, startPos, endPos, ref, alt, build)
        varType = MutUtils.determineVariantType(mut)

        if varType == "snp":  # Snps
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
        if varType == "del":  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)
        elif varType == "ins":  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)

        return mut
Esempio n. 9
0
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(
            updated_start == start,
            "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(
            updated_ref_allele == ref_allele,
            "Ref allele should be %s but was %s." %
            (ref_allele, updated_ref_allele))
        self.assertTrue(
            updated_alt_allele == alt_allele,
            "Alt allele should be %s but was %s." %
            (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(
            updated_start == start,
            "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(
            updated_ref_allele == ref_allele,
            "Ref allele should be %s but was %s." %
            (ref_allele, updated_ref_allele))
        self.assertTrue(
            updated_alt_allele == alt_allele,
            "Alt allele should be %s but was %s." %
            (alt_allele, updated_alt_allele))
Esempio n. 10
0
    def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build):
        mut = MutationData(str(chr), str(start), str(end), ref_allele, alt_allele, str(build))
        varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele)

        if TranscriptProviderUtils.is_xnp(varType):  # Snps and other xNPs
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
        if varType == VariantClassification.VT_DEL:  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)
        elif varType == VariantClassification.VT_INS:  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)

        return mut
Esempio n. 11
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Esempio n. 12
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Esempio n. 13
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Intron", "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource.")
Esempio n. 14
0
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T",
                        "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT",
                        "Alt allele should be TT but was %s." % mut.alt_allele)
Esempio n. 15
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Esempio n. 16
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
    def testdbNSFPAnnotationWithMissingExactMatch(self):  # SNPs only
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_6vars_exact_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_6vars_exact_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35138"
        m1.end = "35138"
        m1.ref_allele = "T"
        m1.alt_allele = "C"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="Integer",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="", datasourceName="dbNSFP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Esempio n. 18
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
    def test_validation_correction_valid(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Valid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out", "test_validation_correction2.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join("configs", "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.")
            self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.")
            self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) )
            self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele'])
            self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
Esempio n. 20
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Esempio n. 21
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
Esempio n. 22
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Esempio n. 23
0
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))
    def test_validation_correction(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for invalid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Invalid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out",
                                       "test_validation_correction1.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join(
                                                   "configs",
                                                   "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Match_Norm_Validation_Allele2'],
                "Matched norm alleles did not match.")
            self.assertTrue(
                line_dict['Tumor_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match normal alleles for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Reference_Allele'],
                "Norm validation alleles did not match reference (norm, reference): (%s, %s)"
                % (line_dict['Match_Norm_Validation_Allele1'],
                   line_dict['Reference_Allele']))
            self.assertTrue(
                "G" == line_dict['Reference_Allele'],
                "Reference allele should have been G, but was " +
                line_dict['Reference_Allele'])
            self.assertTrue(
                "None" == line_dict['Mutation_Status'],
                "Mutation Status must be None when Validation Status is Invalid: "
                + line_dict['Mutation_Status'])
Esempio n. 25
0
 def testAKT1(self):
     """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
     """
     m = MutationData()
     m.chr = '14'
     m.start = '105246407'
     m.end = '105246407'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     m = gafDatasource.annotate_mutation(m)
     self.assertTrue(m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
Esempio n. 26
0
    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationData()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr="22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == VariantClassification.MISSENSE)
Esempio n. 27
0
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut, "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569, "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570, "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-", "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T", "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut, "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569, "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570, "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-", "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT", "Alt allele should be TT but was %s." % mut.alt_allele)
Esempio n. 28
0
    def testdbNSFPAnnotationWithMissingExactMatch(self):  # SNPs only
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "dbNSFP_chr1_6vars_exact_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "dbNSFP_chr1_6vars_exact_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35138"
        m1.end = "35138"
        m1.ref_allele = "T"
        m1.alt_allele = "C"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="Integer",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
 def testBasicAnnotation(self):
     ds = GenericGenomicMutationDatasource('testdata/small_cosmic_2/cosmic_v65_chr18.tsv')
 
     m = MutationData()
     m.chr = '18'
     m.start = '48604683'
     m.end = '48604683'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     m.createAnnotation('strand', '+')
 
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['_cosmic_muts_disease_counts'], 'Unable to annotate mutation correctly')
    def test_simple_annotate(self):

        ds = self._create_test_ds("testdata/small_tsv_leveldb/dbNSFP2.4_variant.chr1_cut5000.tsv", os.path.abspath("out/test_simple_annotate_snp_only_leveldb"), ["chr", "pos(1-coor)", "pos(1-coor)", "ref", "alt"])
        m = MutationData()
        # 1	35138	T	A
        m.chr = "1"
        m.start = "35138"
        m.end = "35138"
        m.ref_allele = "T"
        m.alt_allele = "A"
        m = ds.annotate_mutation(m)

        self.assertTrue(m['phyloP100way_vertebrate_rankscore'] == "0.19875")
Esempio n. 31
0
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationData()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationData()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationData()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
    def createMutations(self):
        """ No inputs.
        Returns a generator of mutations built from the specified maflite file. """

        aliasKeys = self._reverseAlternativeDict.keys()
        allColumns = self._tsvReader.getFieldNames()

        for line in self._tsvReader:

            # We only need to assign fields that are mutation attributes and have a different name in the maflite file.
            mut = MutationData(build=self._build)

            for col in allColumns:
                # Three scenarios:
                #   1) col is name of mutation data field -- simple createAnnotation
                #   2) col name is an alias for a mutation data field -- do lookup then createAnnotation
                #   3) col name is not an alias for a mutation data field -- simple createAnnotation
                if col in aliasKeys:
                    realKey = self._reverseAlternativeDict[col]
                    self.logger.debug(realKey + " found from " + col)
                    val = line[col]
                    if realKey == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(realKey, val, 'INPUT')
                else:
                    # Scenario 1 and 3
                    # Make sure to convert chromosome values.
                    val = line[col]
                    if col == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(col, val, 'INPUT')

            mut.ref_allele, mut.alt_allele = mut.ref_allele.strip(
            ), mut.alt_allele.strip(
            )  #remove any trailing whitespace if present

            # if the alt allele == ref_allele, check that this is not a case where there is an alt_allele2 that is different.
            if mut.alt_allele == mut.ref_allele:
                mut.alt_allele = self._find_alt_allele_in_other_field(
                    line, mut.ref_allele)

            # FIXME: Support more than one alias in the reverse dictionary.  Then this line can be removed.
            if mut.start is not "" and mut.end is "":
                mut.end = mut.start
            if mut.end is not "" and mut.start is "":
                mut.start = mut.end

            yield mut
Esempio n. 35
0
    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationData()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr = "22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == VariantClassification.MISSENSE)
    def test_simple_annotate_with_nonhuman(self):
        """Test a very simple annotation with a nonhuman genome (saccer)"""
        ensembl_ds = self._create_ensembl_ds_from_saccer()

        m = MutationData()
        m.chr = "I"
        m.start = "500"
        m.end = "500"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)

        self.assertTrue(m2['annotation_transcript'] == "YAL069W")
        self.assertTrue(m2['gene'] == "YAL069W")
    def test_simple_annotate_with_nonhuman(self):
        """Test a very simple annotation with a nonhuman genome (saccer)"""
        ensembl_ds = self._create_ensembl_ds_from_saccer()

        m = MutationData()
        m.chr = "I"
        m.start = "500"
        m.end = "500"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)

        self.assertTrue(m2['annotation_transcript'] == "YAL069W")
        self.assertTrue(m2['gene'] == "YAL069W")
Esempio n. 38
0
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = MutationData()
        m.start = 31379268
        m.end = 31379268
        m.chr= "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'].lower() == "hsa-mir-3194-3p", "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] + "   -- if updating GAF, this test may fail as this result may not be appropriate.")
Esempio n. 39
0
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationData()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr="1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(m['gene'] == "HIST3H2A", "Wrong gene (GT: HIST3H2A): " + m['gene'] + "   -- if updating GAF, this test may fail as this gene may not be appropriate.")
        self.assertTrue(m['variant_classification'] == "5'Flank", "Should be 5'Flank, but was " + m['variant_classification'] + " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed.")
Esempio n. 40
0
    def testBasicAnnotation(self):
        ds = GenericGenomicMutationDatasource(
            'testdata/small_cosmic_2/cosmic_v65_chr18.tsv')

        m = MutationData()
        m.chr = '18'
        m.start = '48604683'
        m.end = '48604683'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        m.createAnnotation('strand', '+')

        guess = ds.annotate_mutation(m)
        self.assertTrue(guess['_cosmic_muts_disease_counts'],
                        'Unable to annotate mutation correctly')
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
Esempio n. 42
0
 def testAKT1(self):
     """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
     """
     m = MutationData()
     m.chr = '14'
     m.start = '105246407'
     m.end = '105246407'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     gafDatasource = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = gafDatasource.annotate_mutation(m)
     self.assertTrue(
         m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] +
         "  If updating GAF, this may not be an error, but should be confirmed manually."
     )
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
Esempio n. 44
0
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationData()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(
            m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] +
            "  If updating GAF, this may not be an error, but should be confirmed manually."
        )
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_no_mapping_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change):
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r')
        cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
        cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close
        cc_txs_fp.close()
        transcript_ds.set_custom_canonical_txs(cc_txs)
        m = MutationData()
        m.chr = chrom
        m.start = start
        m.end = end
        m.ref_allele = ref
        m.alt_allele = alt

        m2 = transcript_ds.annotate_mutation(m)

        self.assertEqual(m2['protein_change'], gt_prot_change)
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationData()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_simple_annotate(self):
        """ Annotate a simple example.
        """
        base_config_location = "testdata/ensembl/saccer/"
        config_parser = ConfigUtils.createConfigParser(base_config_location + "ensembl.config")
        title = config_parser.get("general", "title")
        version = config_parser.get("general", "version")
        src_file = config_parser.get("general", "src_file")

        ensembl_ds = EnsemblTranscriptDatasource(title=title, version=version, src_file=src_file)

        m = MutationData()
        m.chr = "22"
        m.start = "22161963"
        m.end = "22161963"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)
    def test_simple_annotate(self):
        """ Annotate a simple example.
        """
        base_config_location = "testdata/ensembl/saccer/"
        config_parser = ConfigUtils.createConfigParser(base_config_location + "ensembl.config")
        title = config_parser.get("general", "title")
        version = config_parser.get("general", "version")
        src_file = config_parser.get("general", "src_file")

        ensembl_ds = EnsemblTranscriptDatasource(title=title, version=version, src_file=src_file)

        m = MutationData()
        m.chr = "22"
        m.start = "22161963"
        m.end = "22161963"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)
Esempio n. 51
0
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = MutationData()
        m.start = 31379268
        m.end = 31379268
        m.chr = "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['gene'].lower() == "hsa-mir-3194-3p",
            "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this result may not be appropriate."
        )
Esempio n. 52
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(
            numSpliceSites == 4,
            "Should have seen 4 splice site mutations, but saw: " +
            str(numSpliceSites))
        self.assertTrue(
            numSilent == 11,
            "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
    def test_canonical_tx_list(self):
        """Test that specifying the canonical list will actually change the transcript selected. """
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationData()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"
        ds.set_custom_canonical_txs(["ENST00000544786"])
        ds.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # NOTE: tx list overrides best effect
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertTrue(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
Esempio n. 54
0
    def testSkippingAltsForSingleMut(self):
        """Test a simple case where a single mutation with alt_allele_seen of False is not produced."""

        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=[], is_skip_no_alts=True)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"
        m.createAnnotation("alt_allele_seen", "False")

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        self.assertRaises(StopIteration, muts.next)
Esempio n. 55
0
    def testSkippingAltsForSingleMut(self):
        """Test a simple case where a single mutation with alt_allele_seen of False is not produced."""

        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=[], is_skip_no_alts=True)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"
        m.createAnnotation("alt_allele_seen", "False")

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        self.assertRaises(StopIteration, muts.next)
Esempio n. 56
0
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationData()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr = "1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(
            m['gene'] == "HIST3H2A",
            "Wrong gene (GT: HIST3H2A): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this gene may not be appropriate."
        )
        self.assertTrue(
            m['variant_classification'] == "5'Flank",
            "Should be 5'Flank, but was " + m['variant_classification'] +
            " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed."
        )
Esempio n. 57
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(
                vc != 'Silent',
                'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(
            all([tmp == "Splice_Site" for tmp in vcs[8:12]]),
            "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]),
                        "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]),
                        "No splice sites should be seen: " + str(vcs[12:20]))