Esempio n. 1
0
    def testESPCoverageAnnotationWithMissingAnnotationValuesIndelAvgMatch(
            self):
        """

        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_avg_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075350"
        m1.end = "100075356"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgSampleReadDepth")
        cur_annotation = Annotation(
            value="91.25",
            datasourceName="ESP",
            dataType="Float",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationData()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationData()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
    def testESPCoverageAnnotationWithSNPAvgMatch(self):
        """
        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "small_esp_coverage_avg_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "small_esp_coverage_avg_ds.config"), tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075334"
        m1.end = "100075334"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(value="75.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(value="692.0", datasourceName="ESP", dataType="Float",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(value="X", datasourceName="ESP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*["testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName, "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(value="1|1|1", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(value="TAA|TAA|TAA", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(value="-|-|-", datasourceName="dbNSFP", dataType="String",
                                    description="", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
Esempio n. 5
0
    def _combine_mutations(mutations):
        """
        Merge multiple adjacent mutations into a single new mutation.

        :param mutations: an ordered list of MutationData
        :returns a new MutationData

        :warning: _combine_mutations does not make any attempt to sanity check input mutations
        it will happily combine overlapping and non-adjacent mutations on disparate chromosomes
        """
        if len(mutations) == 0:
            return None
        if len(mutations) == 1:
            return mutations[0]

        # special logic for the attributes
        start = min([mut.start for mut in mutations])
        end = max([mut.end for mut in mutations])
        chr = mutations[0].chr
        ref = "".join([mut.ref_allele for mut in mutations])
        alt = "".join([mut.alt_allele for mut in mutations])
        build = "|".join(set([x.build for x in mutations]))

        #create the new mutation
        newmut = MutationData(chr=chr,
                              start=start,
                              end=end,
                              ref_allele=ref,
                              alt_allele=alt,
                              build=build)

        #add annotations to the mutation
        allAnnotations = set(flatmap(lambda x: x.keys(), mutations))
        annotationNames = allAnnotations - set(
            mutations[0].getAttributeNames())
        for annotName in annotationNames:
            annotations = []
            for mut in mutations:
                try:
                    annotations.append(mut.getAnnotation(annotName))
                except KeyError:
                    pass

            values = sorted(
                (set([x.getValue() for x in annotations if x.getValue()])))
            value = "|".join(values)
            tags = sorted(set(flatmap(lambda x: x.getTags(), annotations)))
            source = annotations[0].getDatasource()
            datatype = annotations[0].getDataType()
            number = annotations[0].getNumber()
            description = annotations[0].getDescription()
            newmut.createAnnotation(annotationName=annotName,
                                    annotationValue=value,
                                    annotationSource=source,
                                    annotationDataType=datatype,
                                    annotationDescription=description,
                                    tags=tags,
                                    number=number)
        return newmut
def generateTranscriptMuts(gafDS,uniprotDS):
    tDict = gafDS.getTranscriptDict()
    for transcriptID in tDict.keys():
        m = MutationData()
        m.createAnnotation('gene', tDict[transcriptID]['gene'])
        m.createAnnotation('transcript_id', transcriptID)
        m = uniprotDS.annotate_mutation(m)
        yield m
 def testHeaderCreation(self):
     """Test that a tcga vcf header can be generated, even from a blank mutation. """
     vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt")
     m = MutationData()
     m.createAnnotation('center', "broad.mit.edu")
     hdr = vcfOR.createVcfHeader(m)
     self.assertTrue(hdr is not None)
     self.assertTrue(hdr <> "")
     self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
 def testHeaderCreation(self):
     """Test that a tcga vcf header can be generated, even from a blank mutation. """
     vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt")
     m = MutationData()
     m.createAnnotation('center', "broad.mit.edu")
     hdr = vcfOR.createVcfHeader(m)
     self.assertTrue(hdr is not None)
     self.assertTrue(hdr <> "")
     self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
        
        m = MutationData()
        m.createAnnotation('gene',"ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an excpetion is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
Esempio n. 11
0
 def testSetValues(self):
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     self.assertTrue(m["fake1"] == "1", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake1"]))
     self.assertTrue(m["fake2"] == "blah blah", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake2"]))
     
     m["fake2"] = "Whoa"
     self.assertTrue(m["fake2"] == "Whoa", "Could not properly retrieve annotation using the dictionary interface, after a value change.")
     print(str(m))
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an exception is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
    def testDatasourceCreator(self):
        """ Test that the datasource creator process will work for  TranscriptToUniProtProteinPositionTransformingDatasource.  NOTE: This test needs to be updated to use sqlite instead of filesystem file.
        """

        tDS = DatasourceFactory.createDatasource("testdata/small_uniprot_prot_seq_ds/small_uniprot_prot_seq_ds.config", "testdata/small_uniprot_prot_seq_ds/")
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(m[outputAnnotation] == "969", "Did not get proper value (969): " + m[outputAnnotation])
Esempio n. 14
0
 def test_cached_annots_dummy_cache(self):
     """Test dummy cache.  Also, tests a simple store and retrieve, which should be None."""
     cm = CacheManager()
     fake_db_dir_key = "blah"
     cm.initialize(None, fake_db_dir_key, is_read_only=False)
     m = MutationData()
     m.createAnnotation("blah1", "val1", annotationSource="INPUT")
     m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
     cm.store_annotations_in_cache(m)
     annots = cm.retrieve_cached_annotations(m)
     self.assertTrue(annots is None)
Esempio n. 15
0
 def testAnnotationSourceIsPopulated(self):
     ''' Tests that the annotation source is not blank for the example tsv datasource. '''
     geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
     self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
     
     m = MutationData()
     m.createAnnotation('gene',"ABL1")
     m = geneDS.annotate_mutation(m)
     self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource() <> "Unknown", "Annotation source was unknown")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource().strip() <> "", "Annotation source was blank")
 def test_range_fetch(self):
     m = MutationData()
     m.createAnnotation('chr', '1')
     m.createAnnotation('start', 78978)
     m.createAnnotation('end', 79000)
     
     self.bigwig_datasource.annotate_mutation(m)
     self.assertEqual(m.get('TestBigWig_score'), 0.75)
    def testBasicAnnotationWithChange(self):
        """ Test whether we can translate from one coordinate system to another.  This tests a known change.
        """
        tDS = TranscriptToUniProtProteinPositionTransformingDatasource(title="UniProt", version="test", src_file="file://testdata/small_uniprot_prot_seq_ds/db")

        # Must correspond to what the datasource is going to generate.
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(m[outputAnnotation] == "969", "Did not get proper value (969): " + m[outputAnnotation])
Esempio n. 18
0
    def testdbNSFPNoRefAltAnnotationWithExactMatch(self):
        """

        """
        self.logger.info("Initializing dbNSFP")
        tabixIndexedTsvDirName = os.path.join(*[
            "testdata", "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds", "hg19"
        ])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(
                tabixIndexedTsvDirName,
                "dbNSFP_chr1_chr3_100vars_exact_no_ref_alt_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "1"
        m1.start = "35140"
        m1.end = "35140"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("dbNSFP_codonpos")
        cur_annotation = Annotation(
            value="1|1|1",
            datasourceName="dbNSFP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_refcodon")
        cur_annotation = Annotation(
            value="TAA|TAA|TAA",
            datasourceName="dbNSFP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("dbNSFP_cds_strand")
        cur_annotation = Annotation(
            value="-|-|-",
            datasourceName="dbNSFP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
Esempio n. 19
0
    def testESPCoverageAnnotationWithMissingIndelOverlapMatch(self):
        """


        """
        self.logger.info("Initializing ESP6500SI-V2 Coverage")
        tabixIndexedTsvDirName = os.path.join(
            *["testdata", "small_esp_coverage_overlap_ds", "hg19"])
        tabixIndexedTsvDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedTsvDirName,
                         "small_esp_coverage_overlap_ds.config"),
            tabixIndexedTsvDirName)

        m1 = MutationData()
        m1.chr = "X"
        m1.start = "100075300"
        m1.end = "100075336"

        m1_annotated = tabixIndexedTsvDatasource.annotate_mutation(m1)
        m1_annotation = m1_annotated.getAnnotation("ESP_AvgAAsampleReadDepth")
        cur_annotation = Annotation(
            value="75.0|81.0|81.0",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_TotalAAsamplesCovered")
        cur_annotation = Annotation(
            value="692|692|692",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Chromosome")
        cur_annotation = Annotation(
            value="X|X|X",
            datasourceName="ESP",
            dataType="String",
            description="",
            tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
            number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation),
                        "Annotations do not match.")
Esempio n. 20
0
 def testBasicCosmicInit(self):
     """ Very simple test that will create a datasource from a sample datasource directory.  
     The directory conforms to the standard datasource structure, including placement of the config file.
     """
     ds = DatasourceFactory.createDatasource('testdata/small_cosmic/small_cosmic.config', "testdata/small_cosmic")
     
     m = MutationData()
     m.chr = 19
     m.start = 58858921
     m.end = 58858921
     
     m = ds.annotate_mutation(m)
     
     self.assertTrue(m['COSMIC_overlapping_mutation_AAs'] == 'p.P426P(1)', "Did not properly annotate mutation: " + m['COSMIC_overlapping_mutation_AAs'])
Esempio n. 21
0
 def testEmptyAnswer(self):
     ''' The Reference Datasource should return a blank result if the chromosome is not found.
     Note: A log entry should also be written, but this is not tested. '''
     self.logger.info("Please ignore the next logging warning: testdata/reference_ds/chrTHIS_DOES_NOT_EXIST.txt not found.  Please add it.")
     ds = ReferenceDatasource('testdata/reference_ds')
     m = MutationData()
     m.chr = "THIS_DOES_NOT_EXIST"
     m.start = "11"
     m.end = "11"
     
     groundTruth = ""
     # remember that the annotate_mutation returns a generator, so we use an iterator
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly -- should be blank: " + str(guess['ref_context']))
Esempio n. 22
0
    def test_cached_annots(self):
        """Test to make sure that we are not storing annotations that should not be cached.  Also, tests a simple store and retrieve."""
        cache_file = "out/shove.managertest.annots.cache"
        cm = CacheManager()
        fake_db_dir_key = "blah"
        cm.initialize("file://" + cache_file, fake_db_dir_key, is_read_only=False)
        m = MutationData()
        m.createAnnotation("blah1", "val1", annotationSource="INPUT")
        m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
        cm.store_annotations_in_cache(m)
        annots = cm.retrieve_cached_annotations(m)

        self.assertTrue(len(annots.keys()) == 1)
        self.assertTrue(annots["blah2"].getValue() == "val5")
    def createMutations(self):
        """ No inputs.
        Returns a generator of mutations built from the specified maflite file. """

        aliasKeys = self._reverseAlternativeDict.keys()
        allColumns = self._tsvReader.getFieldNames()

        for line in self._tsvReader:

            # We only need to assign fields that are mutation attributes and have a different name in the maflite file.
            mut = MutationData(build=self._build)

            for col in allColumns:
                # Three scenarios:
                #   1) col is name of mutation data field -- simple createAnnotation
                #   2) col name is an alias for a mutation data field -- do lookup then createAnnotation
                #   3) col name is not an alias for a mutation data field -- simple createAnnotation
                if col in aliasKeys:
                    realKey = self._reverseAlternativeDict[col]
                    self.logger.debug(realKey + " found from " + col)
                    val = line[col]
                    if realKey == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(realKey, val, 'INPUT')
                else:
                    # Scenario 1 and 3
                    # Make sure to convert chromosome values.
                    val = line[col]
                    if col == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(col, val, 'INPUT')

            mut.ref_allele, mut.alt_allele = mut.ref_allele.strip(
            ), mut.alt_allele.strip(
            )  #remove any trailing whitespace if present

            # if the alt allele == ref_allele, check that this is not a case where there is an alt_allele2 that is different.
            if mut.alt_allele == mut.ref_allele:
                mut.alt_allele = self._find_alt_allele_in_other_field(
                    line, mut.ref_allele)

            # FIXME: Support more than one alias in the reverse dictionary.  Then this line can be removed.
            if mut.start is not "" and mut.end is "":
                mut.end = mut.start
            if mut.end is not "" and mut.start is "":
                mut.start = mut.end

            yield mut
    def test_continuous_exons_in_segments(self):
        """Test that all exons are accounted when annotating adjacent segments that skip an exon. """
        # SPECC1L 10+	    22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L 8-	    22	16282318	POTEH	2-	24730543	SPECC1L	8-	433.0	-0.00781166374668759		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM
        # SPECC1L-ADORA2A	22	24734447	SPECC1L	10+	41783674	TEF	1-	1215.0	-0.04975556624325125		hg19	CESC.TCGA.BI.A0VR.Tumor.SM.1RACM

        seg1 = MutationData()
        seg1.chr = "22"
        seg1.start = "24734447" # Just passed the exon 9 (0-based)
        seg1.end = "41783674"

        seg2 = MutationData()
        seg2.chr = "22"
        seg2.start = "16282318"
        seg2.end = "24730543" # Just passed the exon 8 (0-based)

        segs = [seg1, seg2]

        # 'ENST00000314328.9' for GENCODE v19
        chosen_tx, transcript_ds = self._get_chosen_tx_and_transcript_ds(seg1.chr, seg1.start)
        result_tuple = transcript_ds._determine_exons_affected_by_start(seg1.start, chosen_tx)

        self.assertTrue(result_tuple == (10, '+'))

        result_tuple = transcript_ds._determine_exons_affected_by_end(seg2.end, chosen_tx)
        self.assertTrue(result_tuple == (8, '-'))
 def test_no_data_fetch(self):
     """Test for value not found in bigwig.  In this case, our test bigwig only has data for 
     chr1 so None is expected return value.
     """
     m = MutationData()
     m.createAnnotation('chr', '13')
     m.createAnnotation('start', 78978)
     m.createAnnotation('end', 79000)
     
     self.bigwig_datasource.annotate_mutation(m)
     self.assertEqual(m.get('TestBigWig_score'), None)
Esempio n. 26
0
    def test_copy(self):
        """Test annotation copy """
        m = MutationData()
        m.createAnnotation("foo", "3", "blah_source", annotationDescription="testing", tags=["superblah"], number="A")
        m.createCopyAnnotation(m.getAnnotation("foo"), "bar")

        # Note that getAnnotation returns an instance of Annotation, not simply the value
        self.assertEqual(m.getAnnotation("foo"), m.getAnnotation("bar"))
Esempio n. 27
0
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")

        m = MutationData()
        m.createAnnotation('gene', "ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(
            m['CGC_Abridged_Name'] ==
            "v-abl Abelson murine leukemia viral oncogene homolog 1",
            "Test gene TSV datasource did not annotate properly.")
Esempio n. 28
0
    def testSimpleGLAnnotate(self):
        ''' Test a simple annotation case.  Make sure that the ref_context and gc_content annotations are correct. '''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeGCContent=5)
        m = MutationData()
        m.chr = "GL000211.1"
        m.start = "11"
        m.end = "11"
        
        groundTruth = "gaattctttttcaagtaagtc"
        
        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(m['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content']) - (float(3)/float(11))) < .001, "gc_content was not populated properly: " + str(m['gc_content']))
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt,
                                          build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(
                        chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(
                        mut.start) == int(startPos) and int(
                            mut.end) == int(endPos):
                    return True
        else:
            return TranscriptProviderUtils.test_overlap(
                int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False
    def testSimpleRendering(self):
        m = MutationData()
        m.chr = '1'
        m.start = 1000000
        m.end = 1000000
        outputFilename = "out/simpleBEDTest.bed"
        outputRenderer = SimpleBedOutputRenderer(outputFilename)

        outputRenderer.renderMutations([m], Metadata())
        
        fp = file(outputFilename,'r')
        mOut = fp.readline().strip().split(' ')
        self.assertTrue(mOut[0] == "chr1")
        self.assertTrue(mOut[1] == "999999")
        self.assertTrue(mOut[2] == "1000000")
        fp.close()
Esempio n. 31
0
    def testSimpleRendering(self):
        m = MutationData()
        m.chr = '1'
        m.start = 1000000
        m.end = 1000000
        outputFilename = "out/simpleBEDTest.bed"
        outputRenderer = SimpleBedOutputRenderer(outputFilename)

        outputRenderer.renderMutations([m], Metadata())

        fp = file(outputFilename, 'r')
        mOut = fp.readline().strip().split(' ')
        self.assertTrue(mOut[0] == "chr1")
        self.assertTrue(mOut[1] == "999999")
        self.assertTrue(mOut[2] == "1000000")
        fp.close()
Esempio n. 32
0
 def testSimpleAnnotation(self):
     ''' Create a dummy mutation and make sure it gets annotated properly '''
     m = MutationData()
     m.createAnnotation('transcript_id', 'uc001hms.3')
     transcriptDS = DatasourceFactory.createDatasource(
         "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config",
         "testdata/small_transcript_tsv_ds/")
     m = transcriptDS.annotate_mutation(m)
     self.assertTrue(
         m['refseq_test_mRNA_Id'] == 'NM_022746',
         "Transcript-based annotation did not populate properly: " +
         m['refseq_test_mRNA_Id'])
     self.assertTrue(
         m['refseq_test_prot_Id'] == 'NP_073583',
         "Transcript-based annotation did not populate properly: " +
         m['refseq_test_prot_Id'])
 def testSimpleAnnotation(self):
     """ Create a dummy mutation and make sure it gets annotated properly """
     m = MutationData()
     m.createAnnotation("transcript_id", "uc001hms.3")
     transcriptDS = DatasourceFactory.createDatasource(
         "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
     )
     m = transcriptDS.annotate_mutation(m)
     self.assertTrue(
         m["refseq_test_mRNA_Id"] == "NM_022746",
         "Transcript-based annotation did not populate properly: " + m["refseq_test_mRNA_Id"],
     )
     self.assertTrue(
         m["refseq_test_prot_Id"] == "NP_073583",
         "Transcript-based annotation did not populate properly: " + m["refseq_test_prot_Id"],
     )
Esempio n. 34
0
    def testBasicAnnotate(self):
        '''Test that the COSMIC datasource can be initialized with two index files (gp and gpp) and a simple annotation performed'''
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz", title="Cosmic", version="test", gpp_tabix_file= tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        m = MutationData()
        m.createAnnotation("gene", "EGFR")
        m.createAnnotation("transcript_protein_position_start", "747")
        m.createAnnotation("transcript_protein_position_end", "747")
        m.chr = '7'
        m.start = '55259560'
        m.end = '55259560'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '2')
 def test_appris_selects_transcript(self):
     m = MutationData(chr="2", start="201722365", end="201722366", ref_allele="AC", alt_allele="-", build="hg19")
     transcript_ds = TestUtils.createTranscriptProviderDatasource(self.config)
     m = transcript_ds.annotate_mutation(m)
     tx = transcript_ds.get_transcript(m['annotation_transcript'])
     self.assertTrue(tx is not None, "Transcript was None when it should have been found.  Does the ground truth transcript above need to be updated?")
     self.assertEqual(tx._transcript_id,'ENST00000321356.4')
Esempio n. 36
0
    def test_copy(self):
        """Test annotation copy """
        m = MutationData()
        m.createAnnotation("foo",
                           "3",
                           "blah_source",
                           annotationDescription="testing",
                           tags=["superblah"],
                           number="A")
        m.createCopyAnnotation(m.getAnnotation("foo"), "bar")

        # Note that getAnnotation returns an instance of Annotation, not simply the value
        self.assertEqual(m.getAnnotation("foo"), m.getAnnotation("bar"))
Esempio n. 37
0
    def testSimpleAnnotate(self):
        ''' Perform a simple test of one of the aligned chromosomes (chr22) and make sure that we get a reasonable answer.
        '''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeGCContent=5)
        m = MutationData()
        m.chr = "22"
        m.start = "11"
        m.end = "11"
        
        groundTruth = "CCCAAGCTAAACCCAGGCCAC"

        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content'])- (float(6)/float(11))) < .001, "gc_content was not populated properly: " + str(guess['gc_content']))
Esempio n. 38
0
 def testBasicRefInit(self):
     """ Very simple test that will create a reference datasource from a sample datasource directory.  
     The directory conforms to the standard datasource structure, including placement of the config file.
     """
     ds = DatasourceFactory.createDatasource('testdata/reference_ds/reference_ds.config', "testdata/reference_ds")
     
     m = MutationData()
     m.chr = "22"
     m.start = "11"
     m.end = "11"
     
     groundTruth = "CCCAAGCTAAACCCAGGCCAC"
     
     # remember that the annotate_mutation returns a generator, so we use an iterator
     m = ds.annotate_mutation(m)
     
     self.assertTrue(m['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(m['ref_context']))
Esempio n. 39
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
Esempio n. 40
0
 def testAddTag(self):
     ''' Test adding a tag to an annotation '''
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     self.assertTrue("fakeTag" in m.getAnnotation("fake1").getTags(),
                     "Tag was not added properly.")
Esempio n. 41
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Esempio n. 42
0
    def testExtentOutOfRangeError(self):
        ''' If a window is specified that extends beyond the beginning or end of a file, truncate the ref_context.  
        Use what is left for gc_content as well.'''
        ds = ReferenceDatasource('testdata/reference_ds', windowSizeRef=6, windowSizeGCContent=5)
        m = MutationData()
        m.chr = "22"
        m.start = "4"
        m.end = "4"
        
        # "CCCAAGCTAAACCCAGGCCAC"
        groundTruth = "CCCAAGCTAA"
        
        guess = ds.annotate_mutation(m)
        
        self.assertTrue(guess['ref_context'] == groundTruth, "ref_context was not populated properly: " + str(guess['ref_context']))

        # gc_content is rounded to 3 decimal places
        self.assertTrue(fabs(float(guess['gc_content']) - (float(5)/float(9))) < .001, "gc_content was not populated properly: " + str(guess['gc_content']))
    def testPopulatedButNullValuesInInitNLod(self):
        """Test that if init_n_lod is "." or "", there is no error """
        m = MutationData()
        m.createAnnotation("init_n_lod", "")
        outputFilename = "out/blank.vcf"
        vcfOR = TcgaVcfOutputRenderer(outputFilename)
        lod = vcfOR._extract_lod(m,"init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '.'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '6'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '6.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '-12.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, -12)

        m.createAnnotation("t_lod_fstar", "")
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '.'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '6'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '6.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '-12.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, -12)
Esempio n. 44
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
Esempio n. 45
0
    def testBasicAnnotation(self):
        ds = GenericGenomicMutationDatasource(
            'testdata/small_cosmic_2/cosmic_v65_chr18.tsv')

        m = MutationData()
        m.chr = '18'
        m.start = '48604683'
        m.end = '48604683'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        m.createAnnotation('strand', '+')

        guess = ds.annotate_mutation(m)
        self.assertTrue(guess['_cosmic_muts_disease_counts'],
                        'Unable to annotate mutation correctly')
Esempio n. 46
0
    def testBasicCosmicInit(self):
        """ Very simple test that will create a datasource from a sample datasource directory.  
        The directory conforms to the standard datasource structure, including placement of the config file.
        """
        ds = DatasourceFactory.createDatasource(
            'testdata/small_cosmic/small_cosmic.config',
            "testdata/small_cosmic")

        m = MutationData()
        m.chr = 19
        m.start = 58858921
        m.end = 58858921

        m = ds.annotate_mutation(m)

        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'] == 'p.P426P(1)',
            "Did not properly annotate mutation: " +
            m['COSMIC_overlapping_mutation_AAs'])
Esempio n. 47
0
    def testRetrieveMissingAnnotations(self):
        """ Test simple case.
        """
        m = MutationData()
        m.createAnnotation("a1", "1")
        m.createAnnotation("a2", "1")
        m.createAnnotation("a3", "1")
        m.createAnnotation("a4", "1")

        annotationNames = ["a3", "a2"]

        result = MutUtils.retrieveMissingAnnotations(m,annotationNames)

        self.assertIsNotNone(result)
        self.assertTrue(len(result) == 0, "Result was not empty: " + str(result))

        annotationNames = ["zztop", "a1", "blah", "dummy"]
        result = MutUtils.retrieveMissingAnnotations(m,annotationNames)
        self.assertTrue(result[0] == "blah", "Result was not sorted")
        self.assertTrue("blah" in result and "dummy" in result and "zztop" in result, "Incorrect elements (Truth: [zztop, blah, dummy]): " + str(result))
 def testBasicAnnotation(self):
     ds = GenericGenomicMutationDatasource('testdata/small_cosmic_2/cosmic_v65_chr18.tsv')
 
     m = MutationData()
     m.chr = '18'
     m.start = '48604683'
     m.end = '48604683'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     m.createAnnotation('strand', '+')
 
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['_cosmic_muts_disease_counts'], 'Unable to annotate mutation correctly')
Esempio n. 49
0
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz", title="Cosmic", version="test", gpp_tabix_file= tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationData()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1, "Could not find the entry specified by genomic coords.")
        self.assertTrue(m['COSMIC_overlapping_primary_sites'] == "lung(3)", "Did not have the correct primary sites annotation (lung(3)): " + m['COSMIC_overlapping_primary_sites'])
    def test_validation_correction(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for invalid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Invalid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out",
                                       "test_validation_correction1.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join(
                                                   "configs",
                                                   "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Match_Norm_Validation_Allele2'],
                "Matched norm alleles did not match.")
            self.assertTrue(
                line_dict['Tumor_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Tumor_Validation_Allele2'],
                "Tumor alleles did not match normal alleles for an invalid validation result."
            )
            self.assertTrue(
                line_dict['Match_Norm_Validation_Allele1'] ==
                line_dict['Reference_Allele'],
                "Norm validation alleles did not match reference (norm, reference): (%s, %s)"
                % (line_dict['Match_Norm_Validation_Allele1'],
                   line_dict['Reference_Allele']))
            self.assertTrue(
                "G" == line_dict['Reference_Allele'],
                "Reference allele should have been G, but was " +
                line_dict['Reference_Allele'])
            self.assertTrue(
                "None" == line_dict['Mutation_Status'],
                "Mutation Status must be None when Validation Status is Invalid: "
                + line_dict['Mutation_Status'])
Esempio n. 51
0
    def testBasicRefInit(self):
        """ Very simple test that will create a reference datasource from a sample datasource directory.  
        The directory conforms to the standard datasource structure, including placement of the config file.
        """
        ds = DatasourceFactory.createDatasource(
            'testdata/reference_ds/reference_ds.config',
            "testdata/reference_ds")

        m = MutationData()
        m.chr = "22"
        m.start = "11"
        m.end = "11"

        groundTruth = "CCCAAGCTAAACCCAGGCCAC"

        # remember that the annotate_mutation returns a generator, so we use an iterator
        m = ds.annotate_mutation(m)

        self.assertTrue(
            m['ref_context'] == groundTruth,
            "ref_context was not populated properly: " + str(m['ref_context']))
Esempio n. 52
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
Esempio n. 53
0
    def testAnnotateListOfMutations(self):
        """Test that we can initialize an Annotator, without an input or output and then feed mutations,
        one at a time... using a runspec"""

        # Locate the datasource directory and create a runspec
        dbDir = self.config.get("DEFAULT", "dbDir")
        ds = DatasourceFactory.createDatasources(dbDir)
        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=ds)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        m2 = muts.next()
        self.assertTrue(m2.get("gene", None) is not None)
Esempio n. 54
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
Esempio n. 55
0
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationData()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
    def test_validation_correction_valid(self):
        """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation.
        """
        m = MutationData()
        m.chr = "3"
        m.start = "178948145"
        m.end = "178948145"
        m.alt_allele = "A"
        m.ref_allele = "G"
        m['validation_status'] = "Valid"
        m['Match_Norm_Validation_Allele1'] = ""
        m['Match_Norm_Validation_Allele2'] = ""
        m['Tumor_Validation_Allele1'] = ""
        m['Tumor_Validation_Allele2'] = ""
        m['Mutation_Status'] = "Somatic"

        output_filename = os.path.join("out", "test_validation_correction2.maf.tsv")

        outputRenderer = TcgaMafOutputRenderer(output_filename,
                                               configFile=os.path.join("configs", "tcgaMAF2.4_output.config"))
        outputRenderer.renderMutations([m].__iter__())

        tsv_reader = GenericTsvReader(output_filename)

        for line_dict in tsv_reader:
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.")
            self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.")
            self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.")
            self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) )
            self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele'])
            self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
Esempio n. 57
0
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
def generateTranscriptMuts(gafDS, uniprotDS):
    tDict = gafDS.getTranscriptDict()
    for transcriptID in tDict.keys():
        m = MutationData()
        m.createAnnotation('gene', tDict[transcriptID]['gene'])
        m.createAnnotation('transcript_id', transcriptID)
        m = uniprotDS.annotate_mutation(m)
        yield m
Esempio n. 59
0
 def testIter(self):
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     for k in m:
         self.assertTrue((k in ["fake1", "fake2"])
                         or (k in MutationData.attributes),
                         "Key not present: " + k)
    def testBasicAnnotation(self):
        ''' Test an extremely simple case.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="UniProt_NatVar", version="2011_09")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        m.createAnnotation("protein_change", "p.S376C")
        m.createAnnotation("other_transcripts", "TP53_uc002gig.1_Intron|TP53_uc002gih.2_Intron|TP53_uc010cne.1_RNA|TP53_uc010cnf.1_3'UTR|TP53_uc010cng.1_3'UTR|TP53_uc002gii.1_Missense_Mutation_p.S244C|TP53_uc010cnh.1_3'UTR|TP53_uc010cni.1_3'UTR|TP53_uc002gij.2_Missense_Mutation_p.S376C")

        m2 = datasource.annotate_mutation(m)
        annotationName= "UniProt_NatVar_natural_variations"
        self.assertTrue(sorted(m[annotationName].split("|")) == sorted("S -> T (in a sporadic cancer; somatic mutation).|S -> A (in a sporadic cancer; somatic mutation).".split("|")), "Incorrect annotation value seen: " + m[annotationName])