コード例 #1
0
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(
            updated_start == start,
            "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(
            updated_ref_allele == ref_allele,
            "Ref allele should be %s but was %s." %
            (ref_allele, updated_ref_allele))
        self.assertTrue(
            updated_alt_allele == alt_allele,
            "Alt allele should be %s but was %s." %
            (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(
            updated_start == start,
            "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(
            updated_ref_allele == ref_allele,
            "Ref allele should be %s but was %s." %
            (ref_allele, updated_ref_allele))
        self.assertTrue(
            updated_alt_allele == alt_allele,
            "Alt allele should be %s but was %s." %
            (alt_allele, updated_alt_allele))
コード例 #2
0
ファイル: AnnotatorTest.py プロジェクト: ihuerga/oncotator
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None,
                        None,
                        datasources=[],
                        is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
コード例 #3
0
ファイル: MutationDataTest.py プロジェクト: ihuerga/oncotator
 def testAddTag(self):
     ''' Test adding a tag to an annotation '''
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     self.assertTrue("fakeTag" in m.getAnnotation("fake1").getTags(),
                     "Tag was not added properly.")
コード例 #4
0
ファイル: AnnotatorTest.py プロジェクト: alexramos/oncotator
 def _simple_annotate(self, is_skip_no_alts):
     runSpec = RunSpecification()
     runSpec.initialize(None, None, datasources=[], is_skip_no_alts=is_skip_no_alts)
     # Initialize the annotator with the runspec
     annotator = Annotator()
     annotator.initialize(runSpec)
     m = MutationData()
     m.chr = "1"
     m.start = "12941796"
     m.end = "12941796"
     m.alt_allele = "G"
     m.ref_allele = "T"
     m.createAnnotation("alt_allele_seen", "False")
     m2 = MutationData()
     m2.chr = "1"
     m2.start = "12941796"
     m2.end = "12941796"
     m2.alt_allele = "G"
     m2.ref_allele = "T"
     muts = [m, m2]
     muts = annotator.annotate_mutations(muts)
     ctr = 0
     for m in muts:
         ctr += 1
     return ctr
コード例 #5
0
    def _combine_mutations(mutations):
        """
        Merge multiple adjacent mutations into a single new mutation.

        :param mutations: an ordered list of MutationData
        :returns a new MutationData

        :warning: _combine_mutations does not make any attempt to sanity check input mutations
        it will happily combine overlapping and non-adjacent mutations on disparate chromosomes
        """
        if len(mutations) == 0:
            return None
        if len(mutations) == 1:
            return mutations[0]

        # special logic for the attributes
        start = min([mut.start for mut in mutations])
        end = max([mut.end for mut in mutations])
        chr = mutations[0].chr
        ref = "".join([mut.ref_allele for mut in mutations])
        alt = "".join([mut.alt_allele for mut in mutations])
        build = "|".join(set([x.build for x in mutations]))

        #create the new mutation
        newmut = MutationData(chr=chr,
                              start=start,
                              end=end,
                              ref_allele=ref,
                              alt_allele=alt,
                              build=build)

        #add annotations to the mutation
        allAnnotations = set(flatmap(lambda x: x.keys(), mutations))
        annotationNames = allAnnotations - set(
            mutations[0].getAttributeNames())
        for annotName in annotationNames:
            annotations = []
            for mut in mutations:
                try:
                    annotations.append(mut.getAnnotation(annotName))
                except KeyError:
                    pass

            values = sorted(
                (set([x.getValue() for x in annotations if x.getValue()])))
            value = "|".join(values)
            tags = sorted(set(flatmap(lambda x: x.getTags(), annotations)))
            source = annotations[0].getDatasource()
            datatype = annotations[0].getDataType()
            number = annotations[0].getNumber()
            description = annotations[0].getDescription()
            newmut.createAnnotation(annotationName=annotName,
                                    annotationValue=value,
                                    annotationSource=source,
                                    annotationDataType=datatype,
                                    annotationDescription=description,
                                    tags=tags,
                                    number=number)
        return newmut
コード例 #6
0
def generateTranscriptMuts(gafDS,uniprotDS):
    tDict = gafDS.getTranscriptDict()
    for transcriptID in tDict.keys():
        m = MutationData()
        m.createAnnotation('gene', tDict[transcriptID]['gene'])
        m.createAnnotation('transcript_id', transcriptID)
        m = uniprotDS.annotate_mutation(m)
        yield m
コード例 #7
0
ファイル: MutationDataTest.py プロジェクト: Tmacme/oncotator
    def test_copy(self):
        """Test annotation copy """
        m = MutationData()
        m.createAnnotation("foo", "3", "blah_source", annotationDescription="testing", tags=["superblah"], number="A")
        m.createCopyAnnotation(m.getAnnotation("foo"), "bar")

        # Note that getAnnotation returns an instance of Annotation, not simply the value
        self.assertEqual(m.getAnnotation("foo"), m.getAnnotation("bar"))
コード例 #8
0
ファイル: MutationDataTest.py プロジェクト: ihuerga/oncotator
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
コード例 #9
0
def generateTranscriptMuts(gafDS, uniprotDS):
    tDict = gafDS.getTranscriptDict()
    for transcriptID in tDict.keys():
        m = MutationData()
        m.createAnnotation('gene', tDict[transcriptID]['gene'])
        m.createAnnotation('transcript_id', transcriptID)
        m = uniprotDS.annotate_mutation(m)
        yield m
コード例 #10
0
ファイル: MutationDataTest.py プロジェクト: ihuerga/oncotator
 def testIter(self):
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     for k in m:
         self.assertTrue((k in ["fake1", "fake2"])
                         or (k in MutationData.attributes),
                         "Key not present: " + k)
コード例 #11
0
ファイル: MutationDataTest.py プロジェクト: dhlbh/oncotator
 def testPickleable(self):
     """Test that a near-empty MutationData can be pickled"""
     m = MutationData()
     m.chr = "2"
     m.createAnnotation("fake1", "1")
     m.addTagToAnnotation("fake1", "fakeTag")
     import cPickle
     cPickle.dump(m, open("out/testMDPickle.pkl", 'w'))
コード例 #12
0
 def testHeaderCreation(self):
     """Test that a tcga vcf header can be generated, even from a blank mutation. """
     vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt")
     m = MutationData()
     m.createAnnotation('center', "broad.mit.edu")
     hdr = vcfOR.createVcfHeader(m)
     self.assertTrue(hdr is not None)
     self.assertTrue(hdr <> "")
     self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
コード例 #13
0
 def testHeaderCreation(self):
     """Test that a tcga vcf header can be generated, even from a blank mutation. """
     vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt")
     m = MutationData()
     m.createAnnotation('center', "broad.mit.edu")
     hdr = vcfOR.createVcfHeader(m)
     self.assertTrue(hdr is not None)
     self.assertTrue(hdr <> "")
     self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
コード例 #14
0
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
        
        m = MutationData()
        m.createAnnotation('gene',"ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
コード例 #15
0
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an excpetion is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
コード例 #16
0
    def testMissingAnnotations(self):
        ''' Tests that if the required annotations ("gene", "protein_change", and "other_transcripts") are missing, an exception is thrown.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="SmallNatVar", version="test")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        #m.createAnnotation("protein_change", "p.S376C")

        self.assertRaisesRegexp(MissingAnnotationException, "protein_change", datasource.annotate_mutation, m)
コード例 #17
0
ファイル: MutationDataTest.py プロジェクト: dhlbh/oncotator
 def testSetValues(self):
     m = MutationData()
     m.createAnnotation("fake1", "1")
     m.createAnnotation("fake2", "blah blah")
     self.assertTrue(m["fake1"] == "1", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake1"]))
     self.assertTrue(m["fake2"] == "blah blah", "Could not properly retrieve annotation using the dictionary interface.  " + str(m["fake2"]))
     
     m["fake2"] = "Whoa"
     self.assertTrue(m["fake2"] == "Whoa", "Could not properly retrieve annotation using the dictionary interface, after a value change.")
     print(str(m))
コード例 #18
0
 def testAnnotationSourceIsPopulated(self):
     ''' Tests that the annotation source is not blank for the example tsv datasource. '''
     geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
     self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
     
     m = MutationData()
     m.createAnnotation('gene',"ABL1")
     m = geneDS.annotate_mutation(m)
     self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource() <> "Unknown", "Annotation source was unknown")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource().strip() <> "", "Annotation source was blank")
コード例 #19
0
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T",
                        "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(
            annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
            annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut,
                        "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569,
                        "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570,
                        "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-",
                        "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT",
                        "Alt allele should be TT but was %s." % mut.alt_allele)
コード例 #20
0
ファイル: CacheManagerTest.py プロジェクト: ihuerga/oncotator
 def test_cached_annots_dummy_cache(self):
     """Test dummy cache.  Also, tests a simple store and retrieve, which should be None."""
     cm = CacheManager()
     fake_db_dir_key = "blah"
     cm.initialize(None, fake_db_dir_key, is_read_only=False)
     m = MutationData()
     m.createAnnotation("blah1", "val1", annotationSource="INPUT")
     m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
     cm.store_annotations_in_cache(m)
     annots = cm.retrieve_cached_annotations(m)
     self.assertTrue(annots is None)
コード例 #21
0
 def testAnnotationSourceIsPopulated(self):
     ''' Tests that the annotation source is not blank for the example tsv datasource. '''
     geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
     self.assertTrue(geneDS <> None, "gene indexed datasource was None.")
     
     m = MutationData()
     m.createAnnotation('gene',"ABL1")
     m = geneDS.annotate_mutation(m)
     self.assertTrue(m['CGC_Abridged_Name'] == "v-abl Abelson murine leukemia viral oncogene homolog 1","Test gene TSV datasource did not annotate properly.")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource() <> "Unknown", "Annotation source was unknown")
     self.assertTrue(m.getAnnotation('CGC_Abridged_Name').getDatasource().strip() <> "", "Annotation source was blank")
    def testDatasourceCreator(self):
        """ Test that the datasource creator process will work for  TranscriptToUniProtProteinPositionTransformingDatasource.  NOTE: This test needs to be updated to use sqlite instead of filesystem file.
        """

        tDS = DatasourceFactory.createDatasource("testdata/small_uniprot_prot_seq_ds/small_uniprot_prot_seq_ds.config", "testdata/small_uniprot_prot_seq_ds/")
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(m[outputAnnotation] == "969", "Did not get proper value (969): " + m[outputAnnotation])
コード例 #23
0
 def test_cached_annots_dummy_cache(self):
     """Test dummy cache.  Also, tests a simple store and retrieve, which should be None."""
     cm = CacheManager()
     fake_db_dir_key = "blah"
     cm.initialize(None, fake_db_dir_key, is_read_only=False)
     m = MutationData()
     m.createAnnotation("blah1", "val1", annotationSource="INPUT")
     m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
     cm.store_annotations_in_cache(m)
     annots = cm.retrieve_cached_annotations(m)
     self.assertTrue(annots is None)
コード例 #24
0
    def testRangeAnnotation(self):
        ''' Test a simple case with range.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="UniProt_NatVar", version="2011_09")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        m.createAnnotation("protein_change", "p.SLEELEE370_376del") # This is not valid, but does the test.

        m2 = datasource.annotate_mutation(m)
        annotationName= "UniProt_NatVar_natural_variations"
        self.assertTrue(sorted(m[annotationName].split("|")) == sorted("K -> Q (in a sporadic cancer; somatic mutation).|S -> T (in a sporadic cancer; somatic mutation).|S -> A (in a sporadic cancer; somatic mutation).".split("|")), "Incorrect annotation value seen: " + m[annotationName])
コード例 #25
0
    def testBasicAnnotation(self):
        ''' Test an extremely simple case.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="UniProt_NatVar", version="2011_09")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        m.createAnnotation("protein_change", "p.S376C")

        m2 = datasource.annotate_mutation(m)
        annotationName= "UniProt_NatVar_natural_variations"
        self.assertTrue(sorted(m[annotationName].split("|")) == sorted("S -> T (in a sporadic cancer; somatic mutation).|S -> A (in a sporadic cancer; somatic mutation).".split("|")), "Incorrect annotation value seen: " + m[annotationName])
    def testBasicAnnotationWithChange(self):
        """ Test whether we can translate from one coordinate system to another.  This tests a known change.
        """
        tDS = TranscriptToUniProtProteinPositionTransformingDatasource(title="UniProt", version="test", src_file="file://testdata/small_uniprot_prot_seq_ds/db")

        # Must correspond to what the datasource is going to generate.
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(m[outputAnnotation] == "969", "Did not get proper value (969): " + m[outputAnnotation])
コード例 #27
0
    def test_copy(self):
        """Test annotation copy """
        m = MutationData()
        m.createAnnotation("foo",
                           "3",
                           "blah_source",
                           annotationDescription="testing",
                           tags=["superblah"],
                           number="A")
        m.createCopyAnnotation(m.getAnnotation("foo"), "bar")

        # Note that getAnnotation returns an instance of Annotation, not simply the value
        self.assertEqual(m.getAnnotation("foo"), m.getAnnotation("bar"))
コード例 #28
0
 def testBasicAnnotation(self):
     ds = GenericGenomicMutationDatasource('testdata/small_cosmic_2/cosmic_v65_chr18.tsv')
 
     m = MutationData()
     m.chr = '18'
     m.start = '48604683'
     m.end = '48604683'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     m.createAnnotation('strand', '+')
 
     guess = ds.annotate_mutation(m)
     self.assertTrue(guess['_cosmic_muts_disease_counts'], 'Unable to annotate mutation correctly')
コード例 #29
0
    def createMutations(self):
        """ No inputs.
        Returns a generator of mutations built from the specified maflite file. """

        aliasKeys = self._reverseAlternativeDict.keys()
        allColumns = self._tsvReader.getFieldNames()

        for line in self._tsvReader:

            # We only need to assign fields that are mutation attributes and have a different name in the maflite file.
            mut = MutationData(build=self._build)

            for col in allColumns:
                # Three scenarios:
                #   1) col is name of mutation data field -- simple createAnnotation
                #   2) col name is an alias for a mutation data field -- do lookup then createAnnotation
                #   3) col name is not an alias for a mutation data field -- simple createAnnotation
                if col in aliasKeys:
                    realKey = self._reverseAlternativeDict[col]
                    self.logger.debug(realKey + " found from " + col)
                    val = line[col]
                    if realKey == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(realKey, val, 'INPUT')
                else:
                    # Scenario 1 and 3
                    # Make sure to convert chromosome values.
                    val = line[col]
                    if col == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(
                            line[col])
                    mut.createAnnotation(col, val, 'INPUT')

            mut.ref_allele, mut.alt_allele = mut.ref_allele.strip(
            ), mut.alt_allele.strip(
            )  #remove any trailing whitespace if present

            # if the alt allele == ref_allele, check that this is not a case where there is an alt_allele2 that is different.
            if mut.alt_allele == mut.ref_allele:
                mut.alt_allele = self._find_alt_allele_in_other_field(
                    line, mut.ref_allele)

            # FIXME: Support more than one alias in the reverse dictionary.  Then this line can be removed.
            if mut.start is not "" and mut.end is "":
                mut.end = mut.start
            if mut.end is not "" and mut.start is "":
                mut.start = mut.end

            yield mut
コード例 #30
0
    def test_cached_annots(self):
        """Test to make sure that we are not storing annotations that should not be cached.  Also, tests a simple store and retrieve."""
        cache_file = "out/shove.managertest.annots.cache"
        cm = CacheManager()
        fake_db_dir_key = "blah"
        cm.initialize("file://" + cache_file, fake_db_dir_key, is_read_only=False)
        m = MutationData()
        m.createAnnotation("blah1", "val1", annotationSource="INPUT")
        m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
        cm.store_annotations_in_cache(m)
        annots = cm.retrieve_cached_annotations(m)

        self.assertTrue(len(annots.keys()) == 1)
        self.assertTrue(annots["blah2"].getValue() == "val5")
コード例 #31
0
ファイル: OnpCombinerTest.py プロジェクト: ihuerga/oncotator
    def test_mutation_combiner(self):
        """Test that attributes and annotations are set properly with combine mutations"""
        mut1 = MutationData(chr=1,start=100, end=100, ref_allele="G", alt_allele="A")
        mut1.createAnnotation("SomeValue", "value1", "INPUT", "STRING", "a value")
        mut2 = MutationData(chr=1,start=101, end=101, ref_allele="C", alt_allele="T")
        mut2.createAnnotation("SomeValue", "value2", tags=["IT"])
        mut2.createAnnotation("AnotherValue","5")
        result = OnpQueue._combine_mutations([mut1, mut2])

        expected = MutationData(chr=1, start=100, end=101, ref_allele="GC", alt_allele="AT")
        expected.createAnnotation("SomeValue", "value1|value2", "INPUT", "STRING", "a value", tags=["IT"])
        expected.createAnnotation("AnotherValue", "5")
        self.assertTrue(result.attributesEqual(expected))
        self.assertEqual(result, expected)
    def testDatasourceCreator(self):
        """ Test that the datasource creator process will work for v1 of TranscriptToUniProtProteinPositionTransformingDatasource.  NOTE: This test needs to be updated to use sqlite instead of filesystem file.
        """

        tDS = DatasourceFactory.createDatasource(
            "testdata/small_uniprot_prot_seq_ds/small_uniprot_prot_seq_ds.config",
            "testdata/small_uniprot_prot_seq_ds/")
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(
            m[outputAnnotation] == "969",
            "Did not get proper value (969): " + m[outputAnnotation])
コード例 #33
0
    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")

        m = MutationData()
        m.createAnnotation('gene', "ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(
            m['CGC_Abridged_Name'] ==
            "v-abl Abelson murine leukemia viral oncogene homolog 1",
            "Test gene TSV datasource did not annotate properly.")
コード例 #34
0
    def testBasicAnnotation(self):
        ds = GenericGenomicMutationDatasource(
            'testdata/small_cosmic_2/cosmic_v65_chr18.tsv')

        m = MutationData()
        m.chr = '18'
        m.start = '48604683'
        m.end = '48604683'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        m.createAnnotation('strand', '+')

        guess = ds.annotate_mutation(m)
        self.assertTrue(guess['_cosmic_muts_disease_counts'],
                        'Unable to annotate mutation correctly')
コード例 #35
0
ファイル: CacheManagerTest.py プロジェクト: ihuerga/oncotator
    def test_cached_annots(self):
        """Test to make sure that we are not storing annotations that should not be cached.  Also, tests a simple store and retrieve."""
        cache_file = "out/shove.managertest.annots.cache"
        cm = CacheManager()
        fake_db_dir_key = "blah"
        cm.initialize("file://" + cache_file,
                      fake_db_dir_key,
                      is_read_only=False)
        m = MutationData()
        m.createAnnotation("blah1", "val1", annotationSource="INPUT")
        m.createAnnotation("blah2", "val5", annotationSource="some_datasource")
        cm.store_annotations_in_cache(m)
        annots = cm.retrieve_cached_annotations(m)

        self.assertTrue(len(annots.keys()) == 1)
        self.assertTrue(annots["blah2"].getValue() == "val5")
コード例 #36
0
 def testSimpleAnnotation(self):
     ''' Create a dummy mutation and make sure it gets annotated properly '''
     m = MutationData()
     m.createAnnotation('transcript_id', 'uc001hms.3')
     transcriptDS = DatasourceFactory.createDatasource(
         "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config",
         "testdata/small_transcript_tsv_ds/")
     m = transcriptDS.annotate_mutation(m)
     self.assertTrue(
         m['refseq_test_mRNA_Id'] == 'NM_022746',
         "Transcript-based annotation did not populate properly: " +
         m['refseq_test_mRNA_Id'])
     self.assertTrue(
         m['refseq_test_prot_Id'] == 'NP_073583',
         "Transcript-based annotation did not populate properly: " +
         m['refseq_test_prot_Id'])
コード例 #37
0
 def testSimpleAnnotation(self):
     """ Create a dummy mutation and make sure it gets annotated properly """
     m = MutationData()
     m.createAnnotation("transcript_id", "uc001hms.3")
     transcriptDS = DatasourceFactory.createDatasource(
         "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
     )
     m = transcriptDS.annotate_mutation(m)
     self.assertTrue(
         m["refseq_test_mRNA_Id"] == "NM_022746",
         "Transcript-based annotation did not populate properly: " + m["refseq_test_mRNA_Id"],
     )
     self.assertTrue(
         m["refseq_test_prot_Id"] == "NP_073583",
         "Transcript-based annotation did not populate properly: " + m["refseq_test_prot_Id"],
     )
    def testBasicAnnotationWithChange(self):
        """ Test whether we can translate from one coordinate system to another (v1).  This tests a known change.
        """
        tDS = TranscriptToUniProtProteinPositionTransformingDatasource(
            title="UniProt",
            version="test",
            src_file="file://testdata/small_uniprot_prot_seq_ds/db")

        # Must correspond to what the datasource is going to generate.
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'uc009vvt.1')
        m.createAnnotation('protein_change', 'p.T1105A')
        m = tDS.annotate_mutation(m)
        self.assertTrue(
            m[outputAnnotation] == "969",
            "Did not get proper value (969): " + m[outputAnnotation])
    def test_basic_annotation_no_change_2(self):
        """ Test whether we can translate from one coordinate system to another (v2 ... 2014).  This tests no change.
        """
        tDS = TranscriptToUniProtProteinPositionTransformingDatasource(
            title="UniProt",
            version="test",
            src_file="file://testdata/small_uniprot_prot_seq_ds_blastp_2014/db"
        )

        # Must correspond to what the datasource is going to generate
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'ENST00000264990.6')
        m.createAnnotation('protein_change', 'p.S50T')
        m = tDS.annotate_mutation(m)
        self.assertTrue(
            m[outputAnnotation] == "50",
            "Did not get proper value (50): " + m[outputAnnotation])
コード例 #40
0
    def testPopulatedButNullValuesInInitNLod(self):
        """Test that if init_n_lod is "." or "", there is no error """
        m = MutationData()
        m.createAnnotation("init_n_lod", "")
        outputFilename = "out/blank.vcf"
        vcfOR = TcgaVcfOutputRenderer(outputFilename)
        lod = vcfOR._extract_lod(m,"init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '.'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '6'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '6.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '-12.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, -12)

        m.createAnnotation("t_lod_fstar", "")
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '.'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '6'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '6.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '-12.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, -12)
コード例 #41
0
    def testPopulatedButNullValuesInInitNLod(self):
        """Test that if init_n_lod is "." or "", there is no error """
        m = MutationData()
        m.createAnnotation("init_n_lod", "")
        outputFilename = "out/blank.vcf"
        vcfOR = TcgaVcfOutputRenderer(outputFilename)
        lod = vcfOR._extract_lod(m,"init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '.'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 50)

        m["init_n_lod"] = '6'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '6.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, 6)

        m["init_n_lod"] = '-12.8'
        lod = vcfOR._extract_lod(m, "init_n_lod")
        self.assertEqual(lod, -12)

        m.createAnnotation("t_lod_fstar", "")
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '.'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 50)

        m["t_lod_fstar"] = '6'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '6.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, 6)

        m["t_lod_fstar"] = '-12.8'
        lod = vcfOR._extract_lod(m, "t_lod_fstar")
        self.assertEqual(lod, -12)
コード例 #42
0
ファイル: MutUtilsTest.py プロジェクト: alexramos/oncotator
    def testRetrievePrecedingBaseFromAnnotationForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        updated_ref_allele, updated_alt_allele, updated_start = \
            MutUtils.retrievePrecedingBaseFromAnnotationForInsertions(mut)
        self.assertTrue(updated_start == start, "Mut start should be %s but was %s." % (start, updated_start))
        self.assertTrue(updated_ref_allele == ref_allele, "Ref allele should be %s but was %s."
                                                          % (ref_allele, updated_ref_allele))
        self.assertTrue(updated_alt_allele == alt_allele, "Alt allele should be %s but was %s."
                                                          % (alt_allele, updated_alt_allele))
コード例 #43
0
ファイル: MutationDataTest.py プロジェクト: ihuerga/oncotator
    def testSetValues(self):
        m = MutationData()
        m.createAnnotation("fake1", "1")
        m.createAnnotation("fake2", "blah blah")
        self.assertTrue(
            m["fake1"] == "1",
            "Could not properly retrieve annotation using the dictionary interface.  "
            + str(m["fake1"]))
        self.assertTrue(
            m["fake2"] == "blah blah",
            "Could not properly retrieve annotation using the dictionary interface.  "
            + str(m["fake2"]))

        m["fake2"] = "Whoa"
        self.assertTrue(
            m["fake2"] == "Whoa",
            "Could not properly retrieve annotation using the dictionary interface, after a value change."
        )
        print(str(m))
    def test_basic_annotation_with_change(self):
        """ Test whether we can translate from one coordinate system to another (v2 2014).  This tests a known change.
        """
        tDS = TranscriptToUniProtProteinPositionTransformingDatasource(
            title="UniProt",
            version="test",
            src_file="file://testdata/small_uniprot_prot_seq_ds_blastp_2014/db"
        )

        # Must correspond to what the datasource is going to generate.
        #ENST00000545482.1_Silent_p.S178S
        outputAnnotation = "UniProt_aapos"
        m = MutationData()
        m.createAnnotation('transcript_id', 'ENST00000545482.1')
        m.createAnnotation('protein_change', 'p.S178S')
        m = tDS.annotate_mutation(m)
        self.assertTrue(
            m[outputAnnotation] == "293",
            "Did not get proper value (293): " + m[outputAnnotation])
コード例 #45
0
    def createMutations(self):
        """ No inputs.
        Returns a generator of mutations built from the specified maflite file. """

        aliasKeys = self._reverseAlternativeDict.keys()
        allColumns = self._tsvReader.getFieldNames()

        for line in self._tsvReader:

            # We only need to assign fields that are mutation attributes and have a different name in the maflite file.
            mut = MutationData(build=self._build)

            for col in allColumns:
                # Three scenarios:
                #   1) col is name of mutation data field -- simple createAnnotation
                #   2) col name is an alias for a mutation data field -- do lookup then createAnnotation
                #   3) col name is not an alias for a mutation data field -- simple createAnnotation
                if col in aliasKeys:
                    realKey = self._reverseAlternativeDict[col]
                    self.logger.debug(realKey + " found from " + col)
                    val = line[col]
                    if realKey == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(line[col])
                    mut.createAnnotation(realKey, val, 'INPUT')
                else:
                    # Scenario 1 and 3
                    # Make sure to convert chromosome values.
                    val = line[col]
                    if col == "chr":
                        val = MutUtils.convertChromosomeStringToMutationDataFormat(line[col])
                    mut.createAnnotation(col, val, 'INPUT') 

            # if the alt allele == ref_allele, check that this is not a case where there is an alt_allele2 that is different.
            if mut.alt_allele == mut.ref_allele:
                mut.alt_allele = self._find_alt_allele_in_other_field(line, mut.ref_allele)

            # FIXME: Support more than one alias in the reverse dictionary.  Then this line can be removed.
            if mut.start is not "" and mut.end is "":
                mut.end = mut.start
            if mut.end is not "" and mut.start is "":
                mut.start = mut.end

            yield mut
コード例 #46
0
ファイル: MutUtilsTest.py プロジェクト: alexramos/oncotator
    def testRetrieveMissingAnnotations(self):
        """ Test simple case.
        """
        m = MutationData()
        m.createAnnotation("a1", "1")
        m.createAnnotation("a2", "1")
        m.createAnnotation("a3", "1")
        m.createAnnotation("a4", "1")

        annotationNames = ["a3", "a2"]

        result = MutUtils.retrieveMissingAnnotations(m,annotationNames)

        self.assertIsNotNone(result)
        self.assertTrue(len(result) == 0, "Result was not empty: " + str(result))

        annotationNames = ["zztop", "a1", "blah", "dummy"]
        result = MutUtils.retrieveMissingAnnotations(m,annotationNames)
        self.assertTrue(result[0] == "blah", "Result was not sorted")
        self.assertTrue("blah" in result and "dummy" in result and "zztop" in result, "Incorrect elements (Truth: [zztop, blah, dummy]): " + str(result))
コード例 #47
0
ファイル: MutUtils.py プロジェクト: ihuerga/oncotator
    def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build):
        mut = MutationData(str(chr), str(start), str(end), ref_allele, alt_allele, str(build))
        varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele)

        if TranscriptProviderUtils.is_xnp(varType):  # Snps and other xNPs
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
        if varType == VariantClassification.VT_DEL:  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)
        elif varType == VariantClassification.VT_INS:  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)

        return mut
コード例 #48
0
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(
            src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz",
            title="Cosmic",
            version="test",
            gpp_tabix_file=tabixDir +
            "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationData()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1,
            "Could not find the entry specified by genomic coords.")
        self.assertTrue(
            m['COSMIC_overlapping_primary_sites'] == "lung(3)",
            "Did not have the correct primary sites annotation (lung(3)): " +
            m['COSMIC_overlapping_primary_sites'])
コード例 #49
0
ファイル: MutUtils.py プロジェクト: dhlbh/oncotator
    def initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build):
        mut = MutationData(chrom, startPos, endPos, ref, alt, build)
        varType = MutUtils.determineVariantType(mut)

        if varType == "snp":  # Snps
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
        if varType == "del":  # deletion
            preceding_bases, updated_ref_allele, updated_start, updated_end =\
                MutUtils.retrievePrecedingBasesForDeletions(mut)
            mut.ref_allele = updated_ref_allele
            mut["ref_allele"] = updated_ref_allele
            mut.alt_allele = "-"
            mut["alt_allele"] = "-"
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)
        elif varType == "ins":  # insertion
            preceding_bases, updated_alt_allele, updated_start, updated_end = \
                MutUtils.retrievePrecedingBasesForInsertions(mut)
            mut.ref_allele = "-"
            mut["ref_allele"] = "-"
            mut.alt_allele = updated_alt_allele
            mut["alt_allele"] = updated_alt_allele
            mut.start = updated_start
            mut["start"] = updated_start
            mut.end = updated_end
            mut["end"] = updated_end
            mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
                                 annotationValue=preceding_bases)

        return mut
コード例 #50
0
ファイル: MutUtilsTest.py プロジェクト: alexramos/oncotator
    def testRetrievePrecedingBasesForInsertions(self):
        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut, "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569, "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570, "Mut end should be 1234570 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-", "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "T", "Alt allele should be T but was %s." % mut.alt_allele)

        chrom = "1"
        start = 1234567
        end = 1234567  # incorrect, but doesn't matter for the purposed of testing
        ref_allele = "GTC"
        alt_allele = "GTCTT"
        build = "19"
        mut = MutationData(chrom, start, end, ref_allele, alt_allele, build)
        preceding_bases, updated_alt_allele, updated_start, updated_end = \
            MutUtils.retrievePrecedingBasesForInsertions(mut)
        mut.ref_allele = "-"
        mut.alt_allele = updated_alt_allele
        mut.start = updated_start
        mut.end = updated_end
        mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue=preceding_bases)
        self.assertTrue("_preceding_bases" in mut, "_preceding_bases is missing in the mutation data.")
        self.assertTrue(mut.start == 1234569, "Mut start should be 1234570 but was %s." % mut.start)
        self.assertTrue(mut.end == 1234570, "Mut end should be 1234571 but was %s." % mut.end)
        self.assertTrue(mut.ref_allele == "-", "Ref allele should be - but was %s." % mut.ref_allele)
        self.assertTrue(mut.alt_allele == "TT", "Alt allele should be TT but was %s." % mut.alt_allele)
コード例 #51
0
ファイル: AnnotatorTest.py プロジェクト: alexramos/oncotator
    def testSkippingAltsForSingleMut(self):
        """Test a simple case where a single mutation with alt_allele_seen of False is not produced."""

        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=[], is_skip_no_alts=True)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"
        m.createAnnotation("alt_allele_seen", "False")

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        self.assertRaises(StopIteration, muts.next)
コード例 #52
0
ファイル: AnnotatorTest.py プロジェクト: alexramos/oncotator
    def testDefaultAnnotations(self):
        """Test that the default annotation values populate properly. """
        annotator = Annotator()
        default_annotations = {"test2": "foo2", "test3": "Should not be seen"}
        overrides = {'test3': 'foo3'}

        m1 = MutationData()
        m1.createAnnotation("test1", "foo1")
        m1.createAnnotation("test2", "")

        m2 = MutationData()
        m2.createAnnotation("test1", "")


        m3 = MutationData()
        m3.createAnnotation("test1", "")
        m3.createAnnotation("test2", "foo2-original")

        muts = [m1, m2, m3]

        muts2 = annotator._applyManualAnnotations(muts, overrides)
        muts_final_gen = annotator._applyDefaultAnnotations(muts2, default_annotations)

        muts_final = []
        for m in muts_final_gen:
            self.assertTrue(m['test3'] == "foo3", "Override did not work")
            muts_final.append(m)

        self.assertTrue(muts_final[0]['test1'] == "foo1")
        self.assertTrue(muts_final[0]['test2'] == "foo2")
        self.assertTrue(muts_final[0]['test3'] == "foo3")

        self.assertTrue(muts_final[1]['test1'] == "")
        self.assertTrue(muts_final[1]['test2'] == "foo2")
        self.assertTrue(muts_final[1]['test3'] == "foo3")

        self.assertTrue(muts_final[2]['test1'] == "")
        self.assertTrue(muts_final[2]['test2'] == "foo2-original")
        self.assertTrue(muts_final[2]['test3'] == "foo3")
コード例 #53
0
ファイル: AnnotatorTest.py プロジェクト: ihuerga/oncotator
    def testSkippingAltsForSingleMut(self):
        """Test a simple case where a single mutation with alt_allele_seen of False is not produced."""

        runSpec = RunSpecification()
        runSpec.initialize(None, None, datasources=[], is_skip_no_alts=True)

        # Initialize the annotator with the runspec
        annotator = Annotator()
        annotator.initialize(runSpec)

        m = MutationData()
        m.chr = "1"
        m.start = "12941796"
        m.end = "12941796"
        m.alt_allele = "G"
        m.ref_allele = "T"
        m.createAnnotation("alt_allele_seen", "False")

        muts = [m]

        muts = annotator.annotate_mutations(muts)
        self.assertRaises(StopIteration, muts.next)
コード例 #54
0
ファイル: PhasingUtilsTest.py プロジェクト: Tmacme/oncotator
    def test_phasing_info_missing(self):
        """Test whether we accurately say whether the phasing info present test works"""
        m1 = MutationData()
        m2 = MutationData()
        m3 = MutationData()
        m4 = MutationData()

        m1.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_genotype", "0|1")
        m4.createAnnotation("phasing_genotype", "0|1")

        # m1 missing gt, m2 complete, m3 missing everything, m4 missing ID
        self.assertFalse(PhasingUtils.has_phasing_information(m1))
        self.assertTrue(PhasingUtils.has_phasing_information(m2))
        self.assertFalse(PhasingUtils.has_phasing_information(m3))
        self.assertFalse(PhasingUtils.has_phasing_information(m4))
コード例 #55
0
    def testInternalFields(self):
        """ Test that an annotation that is not listed explicitly in the required or optional columns is rendered with i_ prepended """
        outputFilename = "out/testInternalFields_v2.4.maf.tsv"
        m = MutationData()
        m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")
        
        # The next annotation is real and should not be considered internal.
        m.createAnnotation("gene", "EGFR")
        
        outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config')
        outputRenderer.renderMutations(iter([m]), ['No comments'])
        
        configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
        requiredColumns = configFile.get("general", "requiredColumns")
        self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF.  If not, the test must be modified.")

        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
        
        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
        self.assertTrue("TEST" not in headers, "TEST was found in output headers when it should have been renamed to i_TEST")
        self.assertTrue("i_TEST" in headers, "i_TEST not found in output headers")
コード例 #56
0
    def testInternalFieldsSkipPrepend(self):
        """ Test that no prepending of "i_" is honored."""
        outputFilename = "out/testInternalFields_v2.4.maf.tsv"
        m = MutationData()
        m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")

        # The next annotation is real and should not be considered internal.
        m.createAnnotation("gene", "EGFR")

        outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config', other_options={OptionConstants.NO_PREPEND:True})
        outputRenderer.renderMutations(iter([m]), ['No comments'])

        configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
        requiredColumns = configFile.get("general", "requiredColumns")
        self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF.  If not, the test must be modified.")

        statinfo = os.stat(outputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
        self.assertTrue("i_TEST" not in headers, "i_TEST was found in output headers when prepend was disabled.")
        self.assertTrue("TEST" in headers, "TEST was not found in output headers.")
コード例 #57
0
    def testBasicAnnotation(self):
        ''' Test an extremely simple case.
        '''
        datasource = GenericGeneProteinPositionDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.tsv", title="UniProt_NatVar", version="2011_09")

        m = MutationData()
        m.createAnnotation("gene", "TP53")
        m.createAnnotation("protein_change", "p.S376C")
        m.createAnnotation("other_transcripts", "TP53_uc002gig.1_Intron|TP53_uc002gih.2_Intron|TP53_uc010cne.1_RNA|TP53_uc010cnf.1_3'UTR|TP53_uc010cng.1_3'UTR|TP53_uc002gii.1_Missense_Mutation_p.S244C|TP53_uc010cnh.1_3'UTR|TP53_uc010cni.1_3'UTR|TP53_uc002gij.2_Missense_Mutation_p.S376C")

        m2 = datasource.annotate_mutation(m)
        annotationName= "UniProt_NatVar_natural_variations"
        self.assertTrue(sorted(m[annotationName].split("|")) == sorted("S -> T (in a sporadic cancer; somatic mutation).|S -> A (in a sporadic cancer; somatic mutation).".split("|")), "Incorrect annotation value seen: " + m[annotationName])
コード例 #58
0
    def testBasicAnnotate(self):
        '''Test that the COSMIC datasource can be initialized with two index files (gp and gpp) and a simple annotation performed'''
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz", title="Cosmic", version="test", gpp_tabix_file= tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        m = MutationData()
        m.createAnnotation("gene", "EGFR")
        m.createAnnotation("transcript_protein_position_start", "747")
        m.createAnnotation("transcript_protein_position_end", "747")
        m.chr = '7'
        m.start = '55259560'
        m.end = '55259560'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '2')
コード例 #59
0
    def testMixedAnnotation(self):
        """Test that the COSMIC datasource can retrieve entries by both gp and gpp."""
        tabixDir = "testdata/small_cosmic_with_gp_and_gpp/"
        cosmicDS = Cosmic(src_file=tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.gz", title="Cosmic", version="test", gpp_tabix_file= tabixDir + "small_cosmic_trimmed_for_sorting.txt.tbi.byAA.sorted.tsv.gz")

        # These values are not taken from a real world scenario, but are cooked for this test.
        # Line 9 should get picked up genomic coords
        # Lines 7,8 should get picked up by the protein position
        m = MutationData()
        m.createAnnotation("gene", "A2M")
        m.createAnnotation("transcript_protein_position_start", "1300")
        m.createAnnotation("transcript_protein_position_end", "1400")
        m.chr = '12'
        m.start = '9227220'
        m.end = '9227230'
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '3')
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('1229') != -1, "Could not find the entry specified by genomic coords.")
        self.assertTrue(m['COSMIC_overlapping_primary_sites'] == "lung(3)", "Did not have the correct primary sites annotation (lung(3)): " + m['COSMIC_overlapping_primary_sites'])
コード例 #60
0
ファイル: MutationDataTest.py プロジェクト: dhlbh/oncotator
 def testDuplicateException(self):
     ''' Check that a Duplicate Exception is raised by default when annotation value is changed through createAnnotation'''
     m = MutationData()
     m.createAnnotation("fake1", "1")
     with self.assertRaises(DuplicateAnnotationException):
         m.createAnnotation("fake1", "blah blah")