コード例 #1
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationData()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        #1	150483621	150483621
        m = MutationData()
        m.chr = '1'
        m.start = '150483621'
        m.end = '150483621'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)
コード例 #2
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow+len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr="1"
            m.ref_allele = refs[s-startWindow]
            m.alt_allele = alts[s-startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass
コード例 #3
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Intron", "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource.")
コード例 #4
0
    def testCreationAndAnnotation(self):
        """ Test the datasource creation and then do a simple annotation
        """
        outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'

        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDS)
        annotator.addDatasource(gppDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))
        tsvReader = GenericTsvReader(testFilename)

        ctr = 0
        for lineDict in tsvReader:
            colName = "UniProt_NatVar_natural_variations"
            self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
            ctr += 1

        self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )
コード例 #5
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(vc != 'Silent', 'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(all([tmp == "Splice_Site" for tmp in vcs[8:12]]), "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]), "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]), "No splice sites should be seen: " + str(vcs[12:20]))
コード例 #6
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(numSpliceSites == 4, "Should have seen 4 splice site mutations, but saw: " + str(numSpliceSites))
        self.assertTrue(numSilent == 11, "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
コード例 #7
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr="22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')
コード例 #8
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl','w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1), (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)
コード例 #9
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
 def testExonRetrievalForGene(self):
     """Make sure that the GAF datasource can retrieve exons, given a gene"""
     testGeneList = ['CEBPA', 'BRCA1', 'PIK3CA']
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     for gene in testGeneList:
         exons = gafDatasource.retrieveExons(gene, isCodingOnly=True)
         self.assertTrue(exons is not None)
         print(str(exons))
コード例 #10
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testChangeInTxModeChangesHashcode(self):
        """Test that a change in the tx-mode will change the hashcode"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        old_hashcode = gafDatasource.get_hashcode()
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        new_hashcode = gafDatasource.get_hashcode()
        self.assertTrue(old_hashcode != new_hashcode)
コード例 #11
0
ファイル: AnnotatorTest.py プロジェクト: alexramos/oncotator
 def testVersionHeader(self):
     """ This method simply tests that the version string returned by the annotator does not cause an exception.
         Minimal checking that the returned sting is actually correct.
         Does not attempt to initialize input or output.  Only a gaf datasource.
      """
     annotator = Annotator()
     annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
     tmp = annotator.createHeaderString()
     self.assertTrue(tmp.find("Gaf ") != -1 or tmp.find("GENCODE") != -1, "Could not find Gaf or GENCODE version in header string.")
     self.assertTrue(tmp.find("Oncotator") != -1, "Could not find the word Oncotator in header string.")
コード例 #12
0
    def testBasicDatasourceSorting(self):
        """Test that the GAF datasource is sorted before a gene-based datasource"""

        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        
        incorrectSortList = [geneDS, gafDatasource]
        guessSortList =  DatasourceFactory.sortDatasources(incorrectSortList)
        self.assertTrue(guessSortList[1] == geneDS, "Sorting is incorrect.")
        self.assertTrue(len(guessSortList) == 2, "Sorting altered number of datasources (gt: 2): " + str(len(guessSortList)))
コード例 #13
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
 def testNoUnknownAnnotations(self):
     """ Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """
     inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt')
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     mutations = inputCreator.createMutations()    
     for m in mutations:
         m = gafDatasource.annotate_mutation(m)
         MutUtils.validateMutation(m)
         unknownAnnotations = MutUtils.getUnknownAnnotations(m)
         self.assertTrue(len(unknownAnnotations) == 0, "Unknown annotations exist in mutation: " + str(unknownAnnotations))
コード例 #14
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr="22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == VariantClassification.MISSENSE)
コード例 #15
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
 def testAKT1(self):
     """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
     """
     m = MutationDataFactory.default_create()
     m.chr = '14'
     m.start = '105246407'
     m.end = '105246407'
     m.ref_allele = 'G'
     m.alt_allele = 'A'
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     m = gafDatasource.annotate_mutation(m)
     self.assertTrue(m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
コード例 #16
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
 def testChrGLs(self):
     """ Test that mutations on unaligned transcripts can be annotated properly.  I.e. when chromosome = GL....."""
     inputCreator = MafliteInputMutationCreator('testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config")
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     mutations = inputCreator.createMutations() 
     for m in mutations:
         try:
             m = gafDatasource.annotate_mutation(m)
             MutUtils.validateMutation(m)
         except Exception as e:
             # Fail this test because an exception was thrown
             self.assertTrue(False, "Erroneous exception was thrown: " + str(e) + "\n" + traceback.format_exc())
         self.assertTrue(m['gene'] != '')
コード例 #17
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationDataFactory.default_create()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] + "  If updating GAF, this may not be an error, but should be confirmed manually.")
コード例 #18
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
 def testExonRetrievalForGenesFromFile(self):
     """Make sure that the GAF datasource can retrieve exons, given a list of genes from a simple file"""
     inputGeneList = file('testdata/testGeneList.txt', 'r')
     outputFileFP = file("out/testGeneListExons.txt", 'w')
     errorFileFP = file("out/testGeneListExons.err.txt", 'w')
     gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
     for line in inputGeneList:
         gene = line.strip()
         exons = gafDatasource.retrieveExons(gene, isCodingOnly=True)
         if len(exons) == 0:
             errorFileFP.write("Could not locate " + gene + "\n")
         for e in exons:
             outputFileFP.write('%s\t%s\t%s\t%s\n' % (e[0], e[1], e[2], e[3]))
コード例 #19
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testNoLostMutations(self):
        """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """
        inputFilename = 'testdata/maflite/Patient0.snp.maf.txt'
        inputCreator = MafliteInputMutationCreator(inputFilename, "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        numMutsInput = len(file(inputFilename, 'r').readlines()) - 1
        mutations = inputCreator.createMutations()  
        ctr = 0  
        for m in mutations:
            m = gafDatasource.annotate_mutation(m)
            MutUtils.validateMutation(m)
            ctr += 1
        self.assertEqual(ctr, numMutsInput, "Gaf data source altered mutation count.")
コード例 #20
0
    def testSimpleAnnotationWithExampleVcf(self):
        """
        Tests the ability to do a simple Gaf 3.0 annotation.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "simpleVCF.Gaf.annotated.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
        annotator.annotate()
コード例 #21
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationDataFactory.default_create()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr="1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(m['gene'] == "HIST3H2A", "Wrong gene (GT: HIST3H2A): " + m['gene'] + "   -- if updating GAF, this test may fail as this gene may not be appropriate.")
        self.assertTrue(m['variant_classification'] == "5'Flank", "Should be 5'Flank, but was " + m['variant_classification'] + " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed.")
コード例 #22
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
        m = MutationDataFactory.default_create()
        m.start = 31379268
        m.end = 31379268
        m.chr= "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'].lower() == "hsa-mir-3194-3p", "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] + "   -- if updating GAF, this test may fail as this result may not be appropriate.")
コード例 #23
0
    def testNoLostMutations(self):
        """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """
        inputFilename = 'testdata/maflite/Patient0.snp.maf.txt'
        inputCreator = MafliteInputMutationCreator(
            inputFilename, "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        numMutsInput = len(file(inputFilename, 'r').readlines()) - 1
        mutations = inputCreator.createMutations()
        ctr = 0
        for m in mutations:
            m = gafDatasource.annotate_mutation(m)
            MutUtils.validateMutation(m)
            ctr += 1
        self.assertEqual(ctr, numMutsInput,
                         "Gaf data source altered mutation count.")
コード例 #24
0
 def testChrGLs(self):
     """ Test that mutations on unaligned transcripts can be annotated properly.  I.e. when chromosome = GL....."""
     inputCreator = MafliteInputMutationCreator(
         'testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config")
     gafDatasource = TestUtils.createTranscriptProviderDatasource(
         self.config)
     mutations = inputCreator.createMutations()
     for m in mutations:
         try:
             m = gafDatasource.annotate_mutation(m)
             MutUtils.validateMutation(m)
         except Exception as e:
             # Fail this test because an exception was thrown
             self.assertTrue(
                 False, "Erroneous exception was thrown: " + str(e) + "\n" +
                 traceback.format_exc())
         self.assertTrue(m['gene'] != '')
    def test_annotating_uniprot_test_file(self):
        """Test variants with known issues with older version of UniProt datasource. This test will fail if using older version of uniprot datasource (pre-2014) """
        db_dir = TestUtils.createUnitTestConfig().get('DEFAULT',"dbDir")
        annotator = Annotator()
        out_file_name = "out/uniprot_recovery.maf.annotated"
        runSpec = RunSpecificationFactory.create_run_spec("MAFLITE", "TCGAMAF", "testdata/maflite/uniprot_recovery.maflite",
                                                          out_file_name, datasource_dir=db_dir, tx_mode=TranscriptProvider.TX_MODE_BEST_EFFECT)
        annotator.initialize(runSpec)
        annotator.annotate()

        out_file_reader = GenericTsvReader(out_file_name)
        for i,line_dict in enumerate(out_file_reader):
            self.assertTrue(line_dict['UniProt_AApos'] != "0")

            #TODO: The fourth entry is currently not picking up the uniprot entry for this.  Remove the "if" statement once issue #253 is addressed
            if i != 4:
                self.assertTrue(line_dict['SwissProt_entry_Id'].endswith("HUMAN"))
コード例 #26
0
ファイル: GafDatasourceTest.py プロジェクト: Tmacme/oncotator
    def testMulticoreAnnotateFromChunkedFile(self):
        #TODO: Add unit test that Mutation data is pickle-able
        inputFile = "testdata/maflite/Patient0.snp.maf.txt"
        outputFile = "out/testGAFMulticorePatient0.snp.maf.txt"
        chunkSize = 200
        numChunks = 4


        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)
        ic = MafliteInputMutationCreator(inputFile)
        oc = SimpleOutputRenderer(outputFile)

        # createChunks
        muts = ic.createMutations()

        allAnnotatedChunksFlat = []
        are_mutations_remaining = True
        p = LoggingPool(processes=numChunks)
        while are_mutations_remaining:

            chunks = []
            for j in xrange(0, numChunks):
                chunk = []
                for i in xrange(0, chunkSize):
                    try:
                        chunk.append(muts.next())
                    except StopIteration:
                        are_mutations_remaining = False
                        break

                chunks.append((chunk, gafDatasource))

            annotatedChunks = p.map(annotate_mutations_global, chunks)
            annotatedChunksFlat = self._flattenChunks(annotatedChunks)
            allAnnotatedChunksFlat.append(annotatedChunksFlat)
        p.close()
        p.join()

        annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat)

        ctr = 0
        oc.renderMutations(annotatedMuts, Metadata())
        tsvReader = GenericTsvReader(outputFile)
        for line in tsvReader:
            ctr += 1
        self.assertTrue(ctr == 730, "Should have read 730 variants, but read " + str(ctr))
コード例 #27
0
    def test_simple_seg_file_annotations(self):
        """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_simple_seg_file_annotations.tsv"
        if os.path.exists(output_filename):
            os.remove(output_filename)
        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()

        i = 1
        for i,seg in enumerate(segs):
            pass

        self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1))

        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()

        gencode_ds = TestUtils._create_test_gencode_ds("out/seg_file_gencode_ds")
        annotator = Annotator()

        segs_annotated = []
        for seg in segs:
            segs_annotated.append(gencode_ds.annotate_segment(seg))

        outputRenderer = SimpleOutputRenderer(output_filename, '')
        outputRenderer.renderMutations(segs_annotated.__iter__())

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
            self.assertTrue("genes" in line_dict.keys())
コード例 #28
0
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = MutationDataFactory.default_create()
        m.start = 31379268
        m.end = 31379268
        m.chr = "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['gene'].lower() == "hsa-mir-3194-3p",
            "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this result may not be appropriate."
        )
コード例 #29
0
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(
            numSpliceSites == 4,
            "Should have seen 4 splice site mutations, but saw: " +
            str(numSpliceSites))
        self.assertTrue(
            numSilent == 11,
            "Should have seen 11 Silent mutations, but saw: " + str(numSilent))
コード例 #30
0
    def testBasicAnnotation(self):
        """ Test annotation from a generic TSV based on a transcript annotation.  Only confirms the proper headers of the output. """
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        transcriptDS = DatasourceFactory.createDatasource(
            "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
        )
        outputFilename = "out/genericTranscriptTest.out.tsv"

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(transcriptDS)
        outputFilename = annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue("refseq_test_mRNA_Id" in headers, "refseq_test_mRNA_Id not found in headers: " + str(headers))
        self.assertTrue("refseq_test_prot_Id" in headers, "refseq_test_prot_Id not found in headers: " + str(headers))
    def test_annotating_uniprot_test_file(self):
        """Test variants with known issues with older version of UniProt datasource. This test will fail if using older version of uniprot datasource (pre-2014) """
        db_dir = TestUtils.createUnitTestConfig().get('DEFAULT', "dbDir")
        annotator = Annotator()
        out_file_name = "out/uniprot_recovery.maf.annotated"
        runSpec = RunSpecificationFactory.create_run_spec(
            "MAFLITE",
            "TCGAMAF",
            "testdata/maflite/uniprot_recovery.maflite",
            out_file_name,
            datasourceDir=db_dir,
            tx_mode=TranscriptProvider.TX_MODE_BEST_EFFECT)
        annotator.initialize(runSpec)
        annotator.annotate()

        out_file_reader = GenericTsvReader(out_file_name)
        for i, line_dict in enumerate(out_file_reader):
            self.assertTrue(line_dict['UniProt_AApos'] != "0")

            #TODO: The fourth entry is currently not picking up the uniprot entry for this.  Remove the "if" statement once issue #253 is addressed
            if i != 4:
                self.assertTrue(
                    line_dict['SwissProt_entry_Id'].endswith("HUMAN"))
コード例 #32
0
    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        ''' 
        
        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = ['ABL1','ABL2','ACSL3','AF15Q14','AF1Q','AF3p21','AF5q31','AKAP9','AKT1','AKT2','ALDH2','ALK','ALO17','APC','ARHGEF12','ARHH','ARID1A','ARID2','ARNT','ASPSCR1','ASXL1','ATF1','ATIC','ATM','ATRX','BAP1','BCL10','BCL11A','BCL11B']
        
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        outputFilename = 'out/genericGeneTest.out.tsv'
        
        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()
        
        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)
        
        fields = tsvReader.getFieldNames()
        self.assertTrue('CGC_Abridged_Other Syndrome/Disease' in fields, "'CGC_Other Syndrome/Disease' was not present in the header")
        self.assertTrue('CGC_Abridged_Mutation Type' in fields, "'CGC_Abridged_Mutation Type' was not present in the header")
        
        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue('gene' in lineDict.keys())
            if lineDict['gene'] in genesAvailable:
                self.assertTrue(lineDict['CGC_Abridged_GeneID'] != '', "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: " + str(ctr))
                linesThatShouldBeAnnotated += 1
            ctr += 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")
コード例 #33
0
    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        ''' 
        
        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = ['ABL1','ABL2','ACSL3','AF15Q14','AF1Q','AF3p21','AF5q31','AKAP9','AKT1','AKT2','ALDH2','ALK','ALO17','APC','ARHGEF12','ARHH','ARID1A','ARID2','ARNT','ASPSCR1','ASXL1','ATF1','ATIC','ATM','ATRX','BAP1','BCL10','BCL11A','BCL11B']
        
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
        geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
        outputFilename = 'out/genericGeneTest.out.tsv'
        
        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()
        
        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)
        
        fields = tsvReader.getFieldNames()
        self.assertTrue('CGC_Abridged_Other Syndrome/Disease' in fields, "'CGC_Other Syndrome/Disease' was not present in the header")
        self.assertTrue('CGC_Abridged_Mutation Type' in fields, "'CGC_Abridged_Mutation Type' was not present in the header")
        
        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue('gene' in lineDict.keys())
            if lineDict['gene'] in genesAvailable:
                self.assertTrue(lineDict['CGC_Abridged_GeneID'] <> '', "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: " + str(ctr))
                linesThatShouldBeAnnotated = linesThatShouldBeAnnotated + 1
            ctr = ctr + 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")
コード例 #34
0
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(
                vc != 'Silent',
                'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(
            all([tmp == "Splice_Site" for tmp in vcs[8:12]]),
            "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]),
                        "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]),
                        "No splice sites should be seen: " + str(vcs[12:20]))
コード例 #35
0
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationDataFactory.default_create()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr = "1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(
            m['gene'] == "HIST3H2A",
            "Wrong gene (GT: HIST3H2A): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this gene may not be appropriate."
        )
        self.assertTrue(
            m['variant_classification'] == "5'Flank",
            "Should be 5'Flank, but was " + m['variant_classification'] +
            " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed."
        )
コード例 #36
0
    def tcp(self):
        print('-' * 25)
        print('\nTCP (Multipath off)')
        MPTCP.disable()

        net, client, server = TestUtils.start_network(self.experiment_dir)
        bwmng_process = TestUtils.run_bwmng()
        TestUtils.run_tcp_server(server)
        time_taken = TestUtils.run_tcp_client(client, self.block_size,
                                              self.delay_time, self.iterations)
        print('Transfer complete')

        sleep(2)
        print('Stopping bwmng...', end='')
        bwmng_process.terminate()
        print('DONE!')
        TESTS_OVER.value = 1
        print('Stopping mininet network...', end='')
        net.stop()
        print('DONE!')

        print('Test complete. Preparing result...')
        TestUtils.dump_result(self.result_dir, 'tcp', time_taken,
                              self.block_size, self.iterations)
コード例 #37
0
class GafDatasourceTest(unittest.TestCase):

    # HACK: Allow config to be viewed by unittest decorators.
    globalConfig = TestUtils.createUnitTestConfig()

    def setUp(self):
        self.logger = logging.getLogger(__name__)
        self.config = TestUtils.createUnitTestConfig()

    def tearDown(self):
        pass

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testNoUnknownAnnotations(self):
        """ Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """
        inputCreator = MafliteInputMutationCreator(
            'testdata/maflite/Patient0.snp.maf.txt')
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        mutations = inputCreator.createMutations()
        for m in mutations:
            m = gafDatasource.annotate_mutation(m)
            MutUtils.validateMutation(m)
            unknownAnnotations = MutUtils.getUnknownAnnotations(m)
            self.assertTrue(
                len(unknownAnnotations) == 0,
                "Unknown annotations exist in mutation: " +
                str(unknownAnnotations))

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testNoLostMutations(self):
        """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """
        inputFilename = 'testdata/maflite/Patient0.snp.maf.txt'
        inputCreator = MafliteInputMutationCreator(
            inputFilename, "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        numMutsInput = len(file(inputFilename, 'r').readlines()) - 1
        mutations = inputCreator.createMutations()
        ctr = 0
        for m in mutations:
            m = gafDatasource.annotate_mutation(m)
            MutUtils.validateMutation(m)
            ctr += 1
        self.assertEqual(ctr, numMutsInput,
                         "Gaf data source altered mutation count.")

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testChrM(self):
        """ Test that mitochondrial mutations can be annotated properly. """
        inputCreator = MafliteInputMutationCreator(
            'testdata/maflite/chrM.maf.txt', "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        mutations = inputCreator.createMutations()
        for m in mutations:
            try:
                m = gafDatasource.annotate_mutation(m)
                MutUtils.validateMutation(m)
            except Exception as e:
                # Fail this test because an exception was thrown
                self.assertTrue(
                    False, "Erroneous exception was thrown: " + str(e) + "\n" +
                    traceback.format_exc())
            self.assertTrue(m['gene'] != '')

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testChrGLs(self):
        """ Test that mutations on unaligned transcripts can be annotated properly.  I.e. when chromosome = GL....."""
        inputCreator = MafliteInputMutationCreator(
            'testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config")
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        mutations = inputCreator.createMutations()
        for m in mutations:
            try:
                m = gafDatasource.annotate_mutation(m)
                MutUtils.validateMutation(m)
            except Exception as e:
                # Fail this test because an exception was thrown
                self.assertTrue(
                    False, "Erroneous exception was thrown: " + str(e) + "\n" +
                    traceback.format_exc())
            self.assertTrue(m['gene'] != '')

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testMC1R(self):
        """Test that this version of the GAF produces a MC1R, instead of TUBB gene"""
        m = MutationDataFactory.default_create()
        m.chr = '16'
        m.start = '89985913'
        m.end = '89985913'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = gafDatasource.annotate_mutation(m)

        # At some point, we would expect this to be MC1R, not TUBB3
        self.assertTrue(
            m['gene'] == "TUBB3", "Incorrect gene found: " + m['gene'] +
            "  If updating GAF, this may not be an error, but should be confirmed manually."
        )

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testAKT1(self):
        """ Test that this version of the GAF produces the up to date gene for a position given from a website user.
        """
        m = MutationDataFactory.default_create()
        m.chr = '14'
        m.start = '105246407'
        m.end = '105246407'
        m.ref_allele = 'G'
        m.alt_allele = 'A'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['gene'] == "AKT1", "Incorrect gene found: " + m['gene'] +
            "  If updating GAF, this may not be an error, but should be confirmed manually."
        )

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def test_effect_tx_mode(self):
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)

        # Canonical mutation was Intron
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(m['variant_classification'] == "Missense_Mutation")

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        m = MutationDataFactory.default_create()
        m.chr = '2'
        m.start = '219137340'
        m.end = '219137340'
        m.ref_allele = 'G'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['gene'] == "PNKD")
        self.assertTrue(
            m['variant_classification'] == "Intron",
            "Canonical no longer is Intron.  This test is no longer valid.  This failure can come up when changing the GAF datasource."
        )

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testSpliceSiteWithinNBases(self):
        """Test that a silent mutation is changed to splice site w/in 10 bases of a splice site """
        # chr21:10,998,326-10,998,346
        # 10,998,336 is a splice site.  (Junction between 10998335 and 336)
        # AGTTCTCCTT C TGGAAAAAAG
        refs = 'AGTTCTCCTTCTGGAAAAAAG'
        alts = 'TCAGACTGAAAATACCCCCCT'
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        vcs = []
        for s in range(10998326, 10998347):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "21"
            m.ref_allele = refs[s - 10998326]
            m.alt_allele = alts[s - 10998326]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(10998336 - int(m.start))
            vc = m['variant_classification']
            self.assertTrue(
                vc != 'Silent',
                'Silent mutation found when it should be a splice site.')

            vcs.append(vc)
            print vc + "  " + m.start

        self.assertTrue(
            all([tmp == "Splice_Site" for tmp in vcs[8:12]]),
            "Not all vcs within 2 bases were splice site: " + str(vcs[8:12]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[0:8]]),
                        "No splice sites should be seen: " + str(vcs[0:8]))
        self.assertTrue(all([tmp != "Splice_Site" for tmp in vcs[12:20]]),
                        "No splice sites should be seen: " + str(vcs[12:20]))

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testSilentMutationGoingToSpliceSite(self):
        """Test that a silent mutation within 10 bp of a splice junction should become a splice site"""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCGCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 28233780
        for s in range(startWindow, 28233806):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            distanceFromSpliceSite = abs(28233793 - int(m.start))
            vc = m['variant_classification']
            vcs.append(vc)
            # self.assertTrue(vc <> 'Silent', 'Silent mutation found when it should be a splice site.')

            if vc.lower() == "splice_site":
                numSpliceSites += 1
            if vc.lower() == "silent":
                numSilent += 1
            print vc + "  " + m.start + "  " + str(distanceFromSpliceSite)

        self.assertTrue(
            numSpliceSites == 4,
            "Should have seen 4 splice site mutations, but saw: " +
            str(numSpliceSites))
        self.assertTrue(
            numSilent == 11,
            "Should have seen 11 Silent mutations, but saw: " + str(numSilent))

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testFlank(self):
        """Test that we can see a Flank mutation."""
        #chr1:28,233,780-28,233,805 Junction is at chr1:28,233,793 & 94
        #

        refs = "TGGGCTCGGGCTCTCTGAAAAGAAAA"
        alts = "TGGGCTCAGGCTCTCTGAAAAGAAAA"
        vcs = []
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        numSpliceSites = 0
        numSilent = 0
        startWindow = 11042200
        for s in range(startWindow, startWindow + len(refs)):
            m = MutationDataFactory.default_create()
            m.start = str(s)
            m.end = str(s)
            m.chr = "1"
            m.ref_allele = refs[s - startWindow]
            m.alt_allele = alts[s - startWindow]

            m = gafDatasource.annotate_mutation(m)

            vc = m['variant_classification']
            vcs.append(vc)

            print vc + "  " + m.start

        pass

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testFlank2(self):
        """Test a second real-world flank scenario"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        # 1	228646357 nearest Gene=HIST3H2A C>T
        m = MutationDataFactory.default_create()
        m.start = str(228646357)
        m.end = str(228646357)
        m.chr = "1"
        m.ref_allele = 'C'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)

        self.assertTrue(
            m['gene'] == "HIST3H2A",
            "Wrong gene (GT: HIST3H2A): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this gene may not be appropriate."
        )
        self.assertTrue(
            m['variant_classification'] == "5'Flank",
            "Should be 5'Flank, but was " + m['variant_classification'] +
            " -- if updating GAF, this test may fail as this test is data specific.  Also, this may fail if padding parameters are changed."
        )

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testMicroRNA(self):
        """Test proper annotation of miRNA
        """
        #uc021qwk.1	chr12:31379258-31379277:-	hsa-miR-3194-3p|?	chr12:31379258-31379277:-		Confidence=100
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        m = MutationDataFactory.default_create()
        m.start = 31379268
        m.end = 31379268
        m.chr = "12"
        m.alt_allele = 'G'

        # This is accurate
        m.ref_allele = 'A'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['gene'].lower() == "hsa-mir-3194-3p",
            "Wrong gene (GT: hsa-mir-3194-3p): " + m['gene'] +
            "   -- if updating GAF, this test may fail as this result may not be appropriate."
        )

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testExonRetrievalForGene(self):
        """Make sure that the GAF datasource can retrieve exons, given a gene"""
        testGeneList = ['CEBPA', 'BRCA1', 'PIK3CA']
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        for gene in testGeneList:
            exons = gafDatasource.retrieveExons(gene, isCodingOnly=True)
            self.assertTrue(exons is not None)
            print(str(exons))

    @unittest.skipIf(
        not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
        "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testExonRetrievalForGenesFromFile(self):
        """Make sure that the GAF datasource can retrieve exons, given a list of genes from a simple file"""
        inputGeneList = file('testdata/testGeneList.txt', 'r')
        outputFileFP = file("out/testGeneListExons.txt", 'w')
        errorFileFP = file("out/testGeneListExons.err.txt", 'w')
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        for line in inputGeneList:
            gene = line.strip()
            exons = gafDatasource.retrieveExons(gene, isCodingOnly=True)
            if len(exons) == 0:
                errorFileFP.write("Could not locate " + gene + "\n")
            for e in exons:
                outputFileFP.write('%s\t%s\t%s\t%s\n' %
                                   (e[0], e[1], e[2], e[3]))

    @unittest.skip("The backing code is experimental and should not be run.")
    # @unittest.skipIf(not os.path.exists(globalConfig.get("gaf3.0", "gafDir")), "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testMulticoreAnnotate(self):
        """Test a (too) simple annotating exercise from GAF on 2 cores"""
        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)

        # Test pickling
        dump(gafDatasource, file('out/testGAFPickle.pkl', 'w'))

        m1 = MutationDataFactory.default_create()
        m1.chr = '3'
        m1.start = '178866811'
        m1.end = '178866811'
        m1.ref_allele = "A"
        m1.alt_allele = "C"
        m1.build = "hg19"

        m2 = MutationDataFactory.default_create()
        m2.chr = '3'
        m2.start = '178866812'
        m2.end = '178866812'
        m2.ref_allele = "A"
        m2.alt_allele = "C"
        m2.build = "hg19"

        p = LoggingPool(processes=2)
        result = p.map(annotate_mutation_global, [(gafDatasource, m1),
                                                  (gafDatasource, m2)])
        p.close()
        p.join()

        for r in result:
            self.assertTrue("transcript_id" in r.keys())
            self.assertTrue("gene" in r.keys())
            self.assertTrue(r["gene"] == "PIK3CA")
        self.assertTrue(result[0].start != result[1].start)

    @unittest.skip("The backing code is experimental and should not be run.")
    # @unittest.skipIf(not os.path.exists(globalConfig.get("gaf3.0", "gafDir")), "Default Datasource, with GAF 3.0, corpus is needed to run this test")
    def testMulticoreAnnotateFromChunkedFile(self):
        #TODO: Add unit test that Mutation data is pickle-able
        inputFile = "testdata/maflite/Patient0.snp.maf.txt"
        outputFile = "out/testGAFMulticorePatient0.snp.maf.txt"
        chunkSize = 200
        numChunks = 4

        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)
        ic = MafliteInputMutationCreator(inputFile)
        oc = SimpleOutputRenderer(outputFile)

        # createChunks
        muts = ic.createMutations()

        allAnnotatedChunksFlat = []
        are_mutations_remaining = True
        p = LoggingPool(processes=numChunks)
        while are_mutations_remaining:

            chunks = []
            for j in xrange(0, numChunks):
                chunk = []
                for i in xrange(0, chunkSize):
                    try:
                        chunk.append(muts.next())
                    except StopIteration:
                        are_mutations_remaining = False
                        break

                chunks.append((chunk, gafDatasource))

            annotatedChunks = p.map(annotate_mutations_global, chunks)
            annotatedChunksFlat = self._flattenChunks(annotatedChunks)
            allAnnotatedChunksFlat.append(annotatedChunksFlat)
        p.close()
        p.join()

        annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat)

        ctr = 0
        oc.renderMutations(annotatedMuts, Metadata())
        tsvReader = GenericTsvReader(outputFile)
        for line in tsvReader:
            ctr += 1
        self.assertTrue(ctr == 730,
                        "Should have read 730 variants, but read " + str(ctr))

    def testChangeInTxModeChangesHashcode(self):
        """Test that a change in the tx-mode will change the hashcode"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_BEST_EFFECT)
        old_hashcode = gafDatasource.get_hashcode()
        gafDatasource.set_tx_mode(TranscriptProvider.TX_MODE_CANONICAL)
        new_hashcode = gafDatasource.get_hashcode()
        self.assertTrue(old_hashcode != new_hashcode)

    def test_start_codon(self):
        """Test a start codon hit in a GAF datasource"""
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221729)
        m.end = str(22221729)
        m.chr = "22"
        m.ref_allele = 'A'
        m.alt_allele = 'T'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == VariantClassification.MISSENSE)

    @unittest.skip(
        "GAF 3.0 datasources are not being supported much longer, but this test may have exposed a minor bug, so is being preserved if a bugfix is implemented."
    )
    def test_denovo(self):
        """GAF de novo test """
        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221737)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'CAT'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221740)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'AACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(
            m['variant_classification'] == 'De_novo_Start_OutOfFrame')

        m = MutationDataFactory.default_create()
        m.start = str(22221735)
        m.end = str(22221739)
        m.chr = "22"
        m.ref_allele = ''
        m.alt_allele = 'ACATAA'
        m = gafDatasource.annotate_mutation(m)
        self.assertTrue(m['variant_classification'] == 'De_novo_Start_InFrame')

    def _flattenChunks(self, chunks):
        [[(yield m) for m in c] for c in chunks]
コード例 #38
0
ファイル: MutationDataTest.py プロジェクト: dhlbh/oncotator
7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
"""
from TestUtils import TestUtils


'''
Created on Oct 23, 2012

@author: gavlee
'''
import unittest
from oncotator.MutationData import MutationData
import logging
from oncotator.DuplicateAnnotationException import DuplicateAnnotationException

TestUtils.setupLogging(__file__, __name__)
class MutationDataTest(unittest.TestCase):
    

    def setUp(self):
        self.logger = logging.getLogger(__name__)
        pass


    def tearDown(self):
        pass


    def testSetValues(self):
        m = MutationData()
        m.createAnnotation("fake1", "1")
コード例 #39
0
 def setUp(self):
     self.logger = logging.getLogger(__name__)
     self.config = TestUtils.createUnitTestConfig()
コード例 #40
0
 def setUp(self):
     self.logger = logging.getLogger(__name__)
     self.config = TestUtils.createUnitTestConfig()
コード例 #41
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        # # 7:140481411-140481411
        m = MutationDataFactory.default_create()
        m.chr = '7'
        m.start = '140481411'
        m.end = '140481411'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        # As a reminder, the COSMIC datasource does not check the ref and alt allele for a match. (14)
        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '22')

        # # 7:140481411-140481411
        m = MutationDataFactory.default_create()
        m.chr = '7'
        m.start = '140481411'
        m.end = '140481411'
        m.ref_allele = "G"
        m.alt_allele = "A"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        # As a reminder, the COSMIC datasource does not check the ref and alt allele for a match. (7)
        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '22')

        # Test a gpp record
        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "ABL1")
        m.createAnnotation("transcript_protein_position_start", "255")
        m.createAnnotation("transcript_protein_position_end", "255")

        # These values (chr, start, and end) are incorrect.  This is by design -- just being used as dummy values.
        m.chr = '9'
        m.start = '1'
        m.end = '1'

        m = cosmicDS.annotate_mutation(m)

        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | egrep "[A-Z]255[A-Z]" | wc
        # 157
        #
        # The rest:
        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | uniq | egrep "_"
        # p.H295_P296insH	9:133747575-133747576
        # p.K356_K357insE	9:133748407-133748408
        # p.K357_N358insK	9:133748407-133748408
        # p.L184_K274del	9:133738150-133738422
        # p.L248_K274del
        #
        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | egrep "_K274"
        # p.L184_K274del	9:133738150-133738422
        # p.L184_K274del	9:133738150-133738422
        # p.L248_K274del
        # p.L248_K274del
        #
        # four overlap p.255 + 157 that are exact matches for p.255.
        # 155 + 4 were hematopoietic_and_lymphoid_tissue

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == str(157 + 4))
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('255') != -1)
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('p.L184_K274del') != -1)
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('p.L248_K274del') != -1)
        self.assertTrue(m['COSMIC_overlapping_primary_sites'].find("haematopoietic_and_lymphoid_tissue(159)") != -1, "Did not have the correct primary sites annotation (haematopoietic_and_lymphoid_tissue(159)): " + m['COSMIC_overlapping_primary_sites'])
コード例 #42
0
    def testRealWorld(self):
        """Test that the full COSMIC datasource can retrieve entries by both gp and gpp."""
        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        cosmicDS = TestUtils.createCosmicDatasource(self.config)

        # These values are not taken from a real world scenario, but are cooked for this test.

        m = MutationDataFactory.default_create()
        m.chr = '1'
        m.start = '12941796'
        m.end = '12941796'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '0')

        # # 7:140481411-140481411
        m = MutationDataFactory.default_create()
        m.chr = '7'
        m.start = '140481411'
        m.end = '140481411'
        m.ref_allele = "G"
        m.alt_allele = "T"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        # As a reminder, the COSMIC datasource does not check the ref and alt allele for a match. (14)
        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '22')

        # # 7:140481411-140481411
        m = MutationDataFactory.default_create()
        m.chr = '7'
        m.start = '140481411'
        m.end = '140481411'
        m.ref_allele = "G"
        m.alt_allele = "A"
        m = gafDS.annotate_mutation(m)
        m = cosmicDS.annotate_mutation(m)

        # As a reminder, the COSMIC datasource does not check the ref and alt allele for a match. (7)
        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == '22')

        # Test a gpp record
        m = MutationDataFactory.default_create()
        m.createAnnotation("gene", "ABL1")
        m.createAnnotation("transcript_protein_position_start", "255")
        m.createAnnotation("transcript_protein_position_end", "255")

        # These values (chr, start, and end) are incorrect.  This is by design -- just being used as dummy values.
        m.chr = '9'
        m.start = '1'
        m.end = '1'

        m = cosmicDS.annotate_mutation(m)

        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | egrep "[A-Z]255[A-Z]" | wc
        # 157
        #
        # The rest:
        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | uniq | egrep "_"
        # p.H295_P296insH	9:133747575-133747576
        # p.K356_K357insE	9:133748407-133748408
        # p.K357_N358insK	9:133748407-133748408
        # p.L184_K274del	9:133738150-133738422
        # p.L248_K274del
        #
        # $ grep -P "^ABL1\t" ~/broad_oncotator_configs/CosmicCompleteTargetedScreensMutantExport.tsv/v76/CosmicCompleteTargetedScreensMutantExport.tsv | cut -f 19,24 | sort | egrep "_K274"
        # p.L184_K274del	9:133738150-133738422
        # p.L184_K274del	9:133738150-133738422
        # p.L248_K274del
        # p.L248_K274del
        #
        # four overlap p.255 + 157 that are exact matches for p.255.
        # 155 + 4 were hematopoietic_and_lymphoid_tissue

        self.assertTrue(m['COSMIC_n_overlapping_mutations'] == str(157 + 4))
        self.assertTrue(m['COSMIC_overlapping_mutation_AAs'].find('255') != -1)
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('p.L184_K274del') != -1)
        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'].find('p.L248_K274del') != -1)
        self.assertTrue(
            m['COSMIC_overlapping_primary_sites'].find(
                "haematopoietic_and_lymphoid_tissue(159)") != -1,
            "Did not have the correct primary sites annotation (haematopoietic_and_lymphoid_tissue(159)): "
            + m['COSMIC_overlapping_primary_sites'])
コード例 #43
0
    m = t[1]
    return ds.annotate_mutation(m)


def annotate_mutations_global(t):
    """Annotate from any datasource given a tuple that is (datasource, mutations).
    Mutations is a list."""
    ds = t[1]
    ms = t[0]
    result = []
    for m in ms:
        result.append(ds.annotate_mutation(m))
    return result


globalConfig = TestUtils.createUnitTestConfig()

TestUtils.setupLogging(__file__, __name__)


# globalConfig = TestUtils.createUnitTestConfig()
# @unittest.skipIf(not os.path.exists(globalConfig.get("gaf3.0", "gafDir")), "Default Datasource, with GAF 3.0, corpus is needed to run this test.  GAF 3.0 will not be supported for much longer.")
@unittest.skipIf(
    not os.path.exists(globalConfig.get("gaf3.0", "gafDir")),
    "Default datasource corpus, with GAF 3.0, is needed to run this test.")
class GafDatasourceTest(unittest.TestCase):

    # HACK: Allow config to be viewed by unittest decorators.
    globalConfig = TestUtils.createUnitTestConfig()

    def setUp(self):
コード例 #44
0
 def _createGafDataSource(self):
     self.logger.info("Initializing gaf 3.0")
     return TestUtils.createTranscriptProviderDatasource(self.config)
コード例 #45
0
ファイル: TestRunner.py プロジェクト: dbm0204/Shipt-Challenge
 def browser_init(self):
     #Instaniates an instance fo the browser.
     self.testUtils = TestUtils(self.browser)
コード例 #46
0
class DatasourceFactoryTest(unittest.TestCase):

    _multiprocess_can_split_ = True

    # HACK: Allow config to be viewed by unittest decorators.
    globalConfig = TestUtils.createUnitTestConfig()

    def setUp(self):
        self.logger = logging.getLogger(__name__)
        self.config = TestUtils.createUnitTestConfig()

    def tearDown(self):
        pass

    def testBasicCosmicInit(self):
        """ Very simple test that will create a datasource from a sample datasource directory.  
        The directory conforms to the standard datasource structure, including placement of the config file.
        """
        ds = DatasourceFactory.createDatasource(
            'testdata/small_cosmic/small_cosmic.config',
            "testdata/small_cosmic")

        m = MutationDataFactory.default_create()
        m.chr = 19
        m.start = 58858921
        m.end = 58858921

        m = ds.annotate_mutation(m)

        self.assertTrue(
            m['COSMIC_overlapping_mutation_AAs'] == 'p.P426P(1)',
            "Did not properly annotate mutation: " +
            m['COSMIC_overlapping_mutation_AAs'])

    def testBasicRefInit(self):
        """ Very simple test that will create a reference datasource from a sample datasource directory.  
        The directory conforms to the standard datasource structure, including placement of the config file.
        """
        ds = DatasourceFactory.createDatasource(
            'testdata/reference_ds/reference_ds.config',
            "testdata/reference_ds")

        m = MutationDataFactory.default_create()
        m.chr = "22"
        m.start = "11"
        m.end = "11"

        groundTruth = "CCCAAGCTAAACCCAGGCCAC"

        # remember that the annotate_mutation returns a generator, so we use an iterator
        m = ds.annotate_mutation(m)

        self.assertTrue(
            m['ref_context'] == groundTruth,
            "ref_context was not populated properly: " + str(m['ref_context']))

    def testBasicGeneTSVInit(self):
        """ Make sure that we can initialize a simple tsv data source """

        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")
        self.assertTrue(geneDS <> None, "gene indexed datasource was None.")

        m = MutationDataFactory.default_create()
        m.createAnnotation('gene', "ABL1")
        m = geneDS.annotate_mutation(m)
        self.assertTrue(
            m['CGC_Abridged_Name'] ==
            "v-abl Abelson murine leukemia viral oncogene homolog 1",
            "Test gene TSV datasource did not annotate properly.")

    @TestUtils.requiresDefaultDB()
    def testBasicDatasourceSorting(self):
        """Test that the GAF datasource is sorted before a gene-based datasource"""

        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            self.config)
        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")

        incorrectSortList = [geneDS, gafDatasource]
        guessSortList = DatasourceFactory.sortDatasources(incorrectSortList)
        self.assertTrue(guessSortList[1] == geneDS, "Sorting is incorrect.")
        self.assertTrue(
            len(guessSortList) == 2,
            "Sorting altered number of datasources (gt: 2): " +
            str(len(guessSortList)))

    @unittest.skipIf(not os.path.exists(globalConfig.get("DEFAULT", "dbDir")),
                     "Default Datasource corpus is needed to run this test")
    def testInitializingDatasources(self):
        """ Test initializing a database dir, both single and multicore.  This test is RAM intensive and requires default data corpus."""

        multiDS = DatasourceFactory.createDatasources(self.config.get(
            "DEFAULT", "dbDir"),
                                                      "hg19",
                                                      isMulticore=True)
        self.assertTrue(multiDS is not None, "Datasource list was None")
        self.assertTrue(len(multiDS) != 0, "Datasource list was empty")
        for i in range(0, len(multiDS)):
            self.assertTrue(multiDS[i] is not None,
                            "multi core datasource was None:  " + str(i))
            self.assertTrue(isinstance(multiDS[i], Datasource))

        # This test can be memory intensive, so get rid of the multiDS, but record how many datasources were created.
        numMultiDS = len(multiDS)
        del multiDS

        singleCoreDS = DatasourceFactory.createDatasources(self.config.get(
            "DEFAULT", "dbDir"),
                                                           "hg19",
                                                           isMulticore=False)
        self.assertTrue(singleCoreDS is not None, "Datasource list was None")
        self.assertTrue(len(singleCoreDS) != 0, "Datasource list was empty")
        for i in range(0, len(singleCoreDS)):
            self.assertTrue(singleCoreDS[i] is not None,
                            "single core datasource was None:  " + str(i))
            self.assertTrue(isinstance(singleCoreDS[i], Datasource))

        self.assertTrue(
            numMultiDS == len(singleCoreDS),
            "Length of single core datasource list was not the same as multicore"
        )
        del singleCoreDS

    def testMulticoreExceptionCatching(self):
        """ Test that a datasource throws an exception during initialization, the DatasourceCreator does not freeze. """
        datasourceTuples = [(
            "testdata/mock_exception_throwing_ds/mock_exception_throwing_ds.config",
            "testdata/mock_exception_throwing_ds/")]

        #DatasourceCreator._createDatasourcesMulticore(4, datasourceTuples)
        self.assertRaisesRegexp(NotImplementedError,
                                "This class throws exception",
                                DatasourceFactory._createDatasourcesMulticore,
                                4, datasourceTuples)

    def testMulticoreNoDatasources(self):
        """ If using multicore, does not hang when no datasources are in the db dir"""
        multiDS = DatasourceFactory.createDatasources('testdata/maflite/',
                                                      "hg19", True)
        self.assertTrue(
            len(multiDS) == 0,
            "Length of multiDS when there were no datasources was not zero.")

    def test_hashcode_generation(self):
        """Test that we can read a hashcode for a datasource, if available."""
        geneDS = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config",
            "testdata/thaga_janakari_gene_ds/hg19/")
        self.assertTrue(geneDS is not None,
                        "gene indexed datasource was None.")

        self.assertTrue(
            geneDS.get_hashcode() == "7120edfdc7b29e45191c81c99894afd5")
コード例 #47
0
@author: lichtens
"""
import unittest

from oncotator.input.MafliteInputMutationCreator import MafliteInputMutationCreator
from oncotator.input.MafliteMissingRequiredHeaderException import MafliteMissingRequiredHeaderException
from oncotator.output.TcgaMafOutputRenderer import TcgaMafOutputRenderer
from oncotator.utils.MutUtils import MutUtils
from oncotator.Annotator import Annotator
from oncotator.DatasourceFactory import DatasourceFactory
from oncotator.utils.GenericTsvReader import GenericTsvReader
import os
from TestUtils import TestUtils

TestUtils.setupLogging(__file__, __name__)


class MafliteInputMutationCreatorTest(unittest.TestCase):
    _multiprocess_can_split_ = True

    def setUp(self):
        self.config = TestUtils.createUnitTestConfig()
        pass

    def tearDown(self):
        pass

    def testMissingRequiredHeaders(self):
        try:
            tmp = MafliteInputMutationCreator(
コード例 #48
0
 def setUp(self):
     self.config = TestUtils.createUnitTestConfig()
     pass
コード例 #49
0
ファイル: TestRunner.py プロジェクト: dbm0204/Shipt-Challenge
 def set_test_utils(self,browser):
     self.testUtils = TestUtils(browser)