Esempio n. 1
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(
            m['variant_classification'] == "Intron",
            "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource."
        )
Esempio n. 2
0
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationData()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Intron", "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource.")
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationData()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationData()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
Esempio n. 4
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
Esempio n. 5
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl', 'w'))

        m1 = MutationData()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationData()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1),
                                                  (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Esempio n. 6
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationData()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationData()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
Esempio n. 7
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Esempio n. 8
0
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationData()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationData()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationData()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationData()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationData()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
Esempio n. 11
0
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
Esempio n. 12
0
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl','w'))

        m1 = MutationData()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationData()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
Esempio n. 13
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Esempio n. 14
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Esempio n. 15
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
Esempio n. 16
0
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(
            src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz",
            title="Cosmic",
            version="test",
            gpp_tabix_file=tabixDir +
            "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationData()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1,
            "Could not find the entry specified by genomic coords.")
        self.assertTrue(
            m['COSMIC_overlapping_primary_sites'] == "lung(3)",
            "Did not have the correct primary sites annotation (lung(3)): " +
            m['COSMIC_overlapping_primary_sites'])
    def test_validation_correction_valid(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Valid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out", "test_validation_correction2.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join("configs", "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.")
            self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.")
            self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) )
            self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele'])
            self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Esempio n. 19
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
Esempio n. 20
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Esempio n. 21
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Esempio n. 22
0
    def testESPCoverageAnnotationWithMissingAnnotationValuesIndelAvgMatch(
            self):
        """

        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_avg_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075350"
        m1.end = "100075356"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgSampleReadDepth")
        cur_annotation = Annotation(
            value="91.25",
            datasourceName="ESP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
    def testESPCoverageAnnotationWithSNPAvgMatch(self):
        """
        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075334"
        m1.end = "100075334"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Esempio n. 24
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
Esempio n. 25
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
    def test_validation_correction(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for invalid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Invalid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out",
                                       "test_validation_correction1.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join(
                                                   "configs",
                                                   "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Match_Norm_Validation_Allele2'],
                "Matched norm alleles did not match.")
            self.assertTrue(
                line_dict['Tumor_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match normal alleles for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Reference_Allele'],
                "Norm validation alleles did not match reference (norm, reference): (%s, %s)"
                % (line_dict['Match_Norm_Validation_Allele1'],
                   line_dict['Reference_Allele']))
            self.assertTrue(
                "G" == line_dict['Reference_Allele'],
                "Reference allele should have been G, but was " +
                line_dict['Reference_Allele'])
            self.assertTrue(
                "None" == line_dict['Mutation_Status'],
                "Mutation Status must be None when Validation Status is Invalid: "
                + line_dict['Mutation_Status'])
Esempio n. 27
0
 def testAKT1(self):
     """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
     """
     m = MutationData()
     m.chr = '14'
     m.start = '105246407'
     m.end = '105246407'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     m = gafDatasource.annotate_mutation(m)
     self.assertTrue(m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
Esempio n. 28
0
    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationData()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr="22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == VariantClassification.MISSENSE)
Esempio n. 29
0
    def testdbNSFPAnnotationWithMissingExactMatch(self):  # SNPs only
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "dbNSFP_chr1_6vars_exact_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "dbNSFP_chr1_6vars_exact_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35138"
        m1.end = "35138"
        m1.ref_allele = "T"
        m1.alt_allele = "C"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="Integer",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(
            value="",
            datasourceName="dbNSFP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
    def test_simple_annotate(self):

        ds = self._create_test_ds("testdata/small_tsv_leveldb/dbNSFP2.4_variant.chr1_cut5000.tsv", os.path.abspath("out/test_simple_annotate_snp_only_leveldb"), ["chr", "pos(1-coor)", "pos(1-coor)", "ref", "alt"])
        m = MutationData()
        # 1	35138	T	A
        m.chr = "1"
        m.start = "35138"
        m.end = "35138"
        m.ref_allele = "T"
        m.alt_allele = "A"
        m = ds.annotate_mutation(m)

        self.assertTrue(m['phyloP100way_vertebrate_rankscore'] == "0.19875")
 def testBasicAnnotation(self):
     ds = GenericGenomicMutationDatasource('testdata/small_cosmic_2/cosmic_v65_chr18.tsv')
 
     m = MutationData()
     m.chr = '18'
     m.start = '48604683'
     m.end = '48604683'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     m.createAnnotation('strand', '+')
 
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['_cosmic_muts_disease_counts'], 'Unable to annotate mutation correctly')
Esempio n. 32
0
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationData()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
Esempio n. 33
0
    def testESPCoverageAnnotationWithMissingIndelOverlapMatch(self):
        """


        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_overlap_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_overlap_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075300"
        m1.end = "100075336"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(
            value="75.0|81.0|81.0",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(
            value="692|692|692",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(
            value="X|X|X",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_v19_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationData()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
Esempio n. 35
0
 def testEmptyAnswer(self):
     ''' The Reference Datasource should return a blank result if the chromosome is not found.
     Note: A log entry should also be written, but this is not tested. '''
     self.logger.info("Please ignore the next logging warning: testdata/reference_ds/chrTHIS_DOES_NOT_EXIST.txt not found.  Please add it.")
     ds = ReferenceDatasource('testdata/reference_ds')
     m = MutationData()
     m.chr = "THIS_DOES_NOT_EXIST"
     m.start = "11"
     m.end = "11"
     
     groundTruth = ""
     # remember that the annotate_mutation returns a generator, so we use an iterator
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly -- should be blank: " + str(guess['ref_context']))
Esempio n. 36
0
 def testBasicCosmicInit(self):
     """ Very simple test that will create a datasource from a sample datasource directory.  
     The directory conforms to the standard datasource structure, including placement of the config file.
     """
     ds = DatasourceFactory.createDatasource('testdata/small_cosmic/small_cosmic.config', "testdata/small_cosmic")
     
     m = MutationData()
     m.chr = 19
     m.start = 58858921
     m.end = 58858921
     
     m = ds.annotate_mutation(m)
     
     self.assertTrue(m['COSMIC_overlapping_mutation_AAs'] == 'p.P426P(1)', "Did not properly annotate mutation: " + m['COSMIC_overlapping_mutation_AAs'])
    def test_small_positive_strand_transcript_change(self):
        """Test one location on a transcript and make sure that the transcript change rendered properly """
        ds = TestUtils._create_test_gencode_ds("out/small_positive_strand_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1A>C", "Incorrect transcript change: " + m2['transcript_change'])

        # positive strand
        m = MutationData()
        m.chr = "3"
        m.start = "178916614"
        m.end = "178916614"
        m.ref_allele = "G"
        m.alt_allele = "T"
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['transcript_change'] == "c.1G>T", "Incorrect transcript change: " + m2['transcript_change'])
Esempio n. 38
0
    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationData()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr = "22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == VariantClassification.MISSENSE)
    def test_simple_annotate_with_nonhuman(self):
        """Test a very simple annotation with a nonhuman genome (saccer)"""
        ensembl_ds = self._create_ensembl_ds_from_saccer()

        m = MutationData()
        m.chr = "I"
        m.start = "500"
        m.end = "500"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)

        self.assertTrue(m2['annotation_transcript'] == "YAL069W")
        self.assertTrue(m2['gene'] == "YAL069W")
Esempio n. 40
0
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = MutationData()
        m.start = 31379268
        m.end = 31379268
        m.chr= "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'].lower() == "hsa-mir-3194-3p", "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] + "   -- if updating GAF, this test may fail as this result may not be appropriate.")
    def test_simple_annotate_with_nonhuman(self):
        """Test a very simple annotation with a nonhuman genome (saccer)"""
        ensembl_ds = self._create_ensembl_ds_from_saccer()

        m = MutationData()
        m.chr = "I"
        m.start = "500"
        m.end = "500"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)

        self.assertTrue(m2['annotation_transcript'] == "YAL069W")
        self.assertTrue(m2['gene'] == "YAL069W")
Esempio n. 42
0
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationData()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr="1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(m['gene'] == "HIST3H2A", "Wrong gene (GT: HIST3H2A): " + m['gene'] + "   -- if updating GAF, this test may fail as this gene may not be appropriate.")
        self.assertTrue(m['variant_classification'] == "5'Flank", "Should be 5'Flank, but was " + m['variant_classification'] + " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed.")
Esempio n. 43
0
    def testBasicAnnotation(self):
        ds = GenericGenomicMutationDatasource(
            'testdata/small_cosmic_2/cosmic_v65_chr18.tsv')

        m = MutationData()
        m.chr = '18'
        m.start = '48604683'
        m.end = '48604683'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        m.createAnnotation('strand', '+')

        guess = ds.annotate_mutation(m)
        self.assertTrue(guess['_cosmic_muts_disease_counts'],
                        'Unable to annotate mutation correctly')
    def testSimpleRendering(self):
        m = MutationData()
        m.chr = '1'
        m.start = 1000000
        m.end = 1000000
        outputFilename = "out/simpleBEDTest.bed"
        outputRenderer = SimpleBedOutputRenderer(outputFilename)

        outputRenderer.renderMutations([m], Metadata())
        
        fp = file(outputFilename,'r')
        mOut = fp.readline().strip().split(' ')
        self.assertTrue(mOut[0] == "chr1")
        self.assertTrue(mOut[1] == "999999")
        self.assertTrue(mOut[2] == "1000000")
        fp.close()
Esempio n. 45
0
    def testBasicAnnotate(self):
        '''Test that the COSMIC datasource can be initialized with two index files (gp and gpp) and a simple annotation performed'''
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz", title="Cosmic", version="test", gpp_tabix_file= tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        m = MutationData()
        m.createAnnotation("gene", "EGFR")
        m.createAnnotation("transcript_protein_position_start", "747")
        m.createAnnotation("transcript_protein_position_end", "747")
        m.chr = '7'
        m.start = '55259560'
        m.end = '55259560'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '2')
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_SNP_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
Esempio n. 47
0
    def testSimpleGLAnnotate(self):
        ''' Test a simple annotation case.  Make sure that the ref_context and gc_content annotations are correct. '''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeGCContent=5)
        m = MutationData()
        m.chr = "GL000211.1"
        m.start = "11"
        m.end = "11"
        
        groundTruth = "gaattctttttcaagtaagtc"
        
        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(m['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content']) - (float(3)/float(11))) < .001, "gc_content was not populated properly: " + str(m['gc_content']))
Esempio n. 48
0
 def testAKT1(self):
     """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
     """
     m = MutationData()
     m.chr = '14'
     m.start = '105246407'
     m.end = '105246407'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     gafDatasource = TestUtils.createTranscriptProviderDatasource(
         self.config)
     m = gafDatasource.annotate_mutation(m)
     self.assertTrue(
         m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] +
         "  If updating GAF, this may not be an error, but should be confirmed manually."
     )
Esempio n. 49
0
    def testSimpleRendering(self):
        m = MutationData()
        m.chr = '1'
        m.start = 1000000
        m.end = 1000000
        outputFilename = "out/simpleBEDTest.bed"
        outputRenderer = SimpleBedOutputRenderer(outputFilename)

        outputRenderer.renderMutations([m], Metadata())

        fp = file(outputFilename, 'r')
        mOut = fp.readline().strip().split(' ')
        self.assertTrue(mOut[0] == "chr1")
        self.assertTrue(mOut[1] == "999999")
        self.assertTrue(mOut[2] == "1000000")
        fp.close()
    def test_hgvs_annotations_simple_SNP(self):
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_")

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'ENSP00000215832:p.Met1Leu')
Esempio n. 51
0
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationData()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(
            m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] +
            "  If updating GAF, this may not be an error, but should be confirmed manually."
        )
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_ds("out/test_hgvs_annotations_no_mapping_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
    def test_no_mapping_file(self):
        """Test that we can still create (from scratch) and instantiate a EnsemblDatasource when no protein mapping is specified (i.e. limited HGVS support)"""
        """Test that HGVS annotations appear (incl. protein change) in a mutation, so we believe that the Transcript objects are populated properly."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_hgvs_annotations_no_mapping_file_", protein_id_mapping_file=None)

        # Now for a negative strand
        m = MutationData()
        m.chr = "22"
        m.start = "22221730"
        m.end = "22221730"
        m.ref_allele = "T"
        m.alt_allele = "G"
        m.build = "hg19"
        m2 = ds.annotate_mutation(m)
        self.assertEqual(m2.get('HGVS_genomic_change', None), 'chr22.hg19:g.22221730T>G')
        self.assertEqual(m2.get('HGVS_coding_DNA_change', None), 'ENST00000215832.6:c.1A>C')
        self.assertEqual(m2.get('HGVS_protein_change', None), 'unknown_prot_seq_id:p.Met1Leu')
Esempio n. 54
0
 def testBasicRefInit(self):
     """ Very simple test that will create a reference datasource from a sample datasource directory.  
     The directory conforms to the standard datasource structure, including placement of the config file.
     """
     ds = DatasourceFactory.createDatasource('testdata/reference_ds/reference_ds.config', "testdata/reference_ds")
     
     m = MutationData()
     m.chr = "22"
     m.start = "11"
     m.end = "11"
     
     groundTruth = "CCCAAGCTAAACCCAGGCCAC"
     
     # remember that the annotate_mutation returns a generator, so we use an iterator
     m = ds.annotate_mutation(m)
     
     self.assertTrue(m['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(m['ref_context']))
Esempio n. 55
0
    def testSimpleAnnotate(self):
        ''' Perform a simple test of one of the aligned chromosomes (chr22) and make sure that we get a reasonable answer.
        '''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeGCContent=5)
        m = MutationData()
        m.chr = "22"
        m.start = "11"
        m.end = "11"
        
        groundTruth = "CCCAAGCTAAACCCAGGCCAC"

        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content'])- (float(6)/float(11))) < .001, "gc_content was not populated properly: " + str(guess['gc_content']))
Esempio n. 56
0
    def testExtentOutOfRangeError(self):
        ''' If a window is specified that extends beyond the beginning or end of a file, truncate the ref_context.  
        Use what is left for gc_content as well.'''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeRef=6, windowSizeGCContent=5)
        m = MutationData()
        m.chr = "22"
        m.start = "4"
        m.end = "4"
        
        # "CCCAAGCTAAACCCAGGCCAC"
        groundTruth = "CCCAAGCTAA"
        
        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content']) - (float(5)/float(9))) < .001, "gc_content was not populated properly: " + str(guess['gc_content']))
    def test_canonical_tx_list_empty(self):
        """Test that not specifying the canonical list will do nothing."""
        ds = TestUtils._create_test_gencode_v19_ds("out/test_canonical_tx_list_")
        m = MutationData()
        m.chr = "22"
        m.start = "22142650"
        m.end = "22142650"
        m.ref_allele = "T"
        m.alt_allele = "A"

        m2 = ds.annotate_mutation(m)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
        self.assertFalse(m2['variant_classification'] == VariantClassification.INTRON)

        ds.set_custom_canonical_txs([])
        m2 = ds.annotate_mutation(m)
        self.assertTrue(m2['variant_classification'] == VariantClassification.MISSENSE)
        self.assertFalse(m2['annotation_transcript'].startswith("ENST00000544786"))
    def test_protein_position_off_by_one(self, chrom, start, end, ref, alt, gt_prot_change):
        config = TestUtils.createUnitTestConfig()
        transcript_ds = TestUtils.createTranscriptProviderDatasource(config)
        cc_txs_fp = file("testdata/tx_exact_uniprot_matches.txt", 'r')
        cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
        cc_txs.append("ENST00000338368") # Add a transcript that is not exactly the same, but close
        cc_txs_fp.close()
        transcript_ds.set_custom_canonical_txs(cc_txs)
        m = MutationData()
        m.chr = chrom
        m.start = start
        m.end = end
        m.ref_allele = ref
        m.alt_allele = alt

        m2 = transcript_ds.annotate_mutation(m)

        self.assertEqual(m2['protein_change'], gt_prot_change)
    def test_simple_annotate(self):
        """ Annotate a simple example.
        """
        base_config_location = "testdata/ensembl/saccer/"
        config_parser = ConfigUtils.createConfigParser(base_config_location + "ensembl.config")
        title = config_parser.get("general", "title")
        version = config_parser.get("general", "version")
        src_file = config_parser.get("general", "src_file")

        ensembl_ds = EnsemblTranscriptDatasource(title=title, version=version, src_file=src_file)

        m = MutationData()
        m.chr = "22"
        m.start = "22161963"
        m.end = "22161963"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)
    def test_simple_annotate(self):
        """ Annotate a simple example.
        """
        base_config_location = "testdata/ensembl/saccer/"
        config_parser = ConfigUtils.createConfigParser(base_config_location + "ensembl.config")
        title = config_parser.get("general", "title")
        version = config_parser.get("general", "version")
        src_file = config_parser.get("general", "src_file")

        ensembl_ds = EnsemblTranscriptDatasource(title=title, version=version, src_file=src_file)

        m = MutationData()
        m.chr = "22"
        m.start = "22161963"
        m.end = "22161963"
        m.ref_allele = "C"
        m.alt_allele = "A"

        m2 = ensembl_ds.annotate_mutation(m)