コード例 #1
0
    def test_R(self):
        r_exec = environment.R('R')
        r_version = r_exec.version
        # missing package
        self.assertRaises(ValueError, r_exec.packageversion, 'foobarbaz')

        version = r_exec.packageversion('stats')
        self.assertTrue(r_version.startswith(version))
コード例 #2
0
ファイル: diffexp.py プロジェクト: RickeyEstes/railroadtracks
 def __init__(self, executable=None, rsource_template=None):
     if executable is None:
         executable = type(self)._default_execpath
     if not isinstance(executable, environment.R):
         executable = environment.R(executable)
     self.r = executable
     if rsource_template is None:
         rsource_template = os.path.join(RSOURCES_DIR, self._rscript_name)
     assert os.path.isfile(rsource_template), \
         'The needed R script "%s" is not a file' % rsource_template
     self.rsource_template = rsource_template
     self._run_cmd = None
     self._version = None
コード例 #3
0
 def __init__(self, executable=None):
     """
     :param executable: the executable is R. If None, the class-level
     attribute :attr:`_default_execpath` will be used.
     :type executable: a :class:`str` or a :class:`environment.R`
     """
     if executable is None:
         executable = type(self)._default_execpath
     if not isinstance(executable, environment.R):
         executable = environment.R(executable)
     self.r = executable
     rsource_template = os.path.join(RSOURCES_DIR, self._rscript_name)
     assert os.path.isfile(rsource_template), \
         'The needed R script "%s" is not a file' % rsource_template
     self.rsource_template = rsource_template
     self._run_cmd = None
     self._version = None
コード例 #4
0
 def test_R_run_snippet(self):
     r_exec = environment.R('R')
     # run a snippet
     magicvariable = 'railroadtracks_import'
     code = """        
     run <- function(p) {
       res <- 1 + p$x
       # output file
       fh_out <- file(p$output_fn, "w")
       write(res, file = fh_out)
       flush(fh_out)
     }
     run(%s)
     """ % magicvariable
     with tempfile.NamedTemporaryFile() as fh_out:
         var_in = {'x': 3, 'output_fn': fh_out.name}
         r_exec.run_snippet(code, var_in)
         res = fh_out.read()
     self.assertEqual(b'4', res.rstrip())
コード例 #5
0
class EnvironmentTestCase(unittest.TestCase):
    def test_executable(self):
        ls_exec = environment.Executable('ls')
        #FIXME: technically no test here

    @unittest.skipIf(not has_R, 'R is missing')
    def test_R(self):
        r_exec = environment.R('R')
        r_version = r_exec.version
        # missing package
        self.assertRaises(ValueError, r_exec.packageversion, 'foobarbaz')

        version = r_exec.packageversion('stats')
        self.assertTrue(r_version.startswith(version))

    @unittest.skipIf(not (has_R and \
                          environment.R('R').packageversion_or_none('rjson') is not None),
                     'R and its package "rjson" must be present.')
    def test_R_run_snippet(self):
        r_exec = environment.R('R')
        # run a snippet
        magicvariable = 'railroadtracks_import'
        code = """        
        run <- function(p) {
          res <- 1 + p$x
          # output file
          fh_out <- file(p$output_fn, "w")
          write(res, file = fh_out)
          flush(fh_out)
        }
        run(%s)
        """ % magicvariable
        with tempfile.NamedTemporaryFile() as fh_out:
            var_in = {'x': 3, 'output_fn': fh_out.name}
            r_exec.run_snippet(code, var_in)
            res = fh_out.read()
        self.assertEqual(b'4', res.rstrip())
コード例 #6
0
class RecipeTestCase(unittest.TestCase):

    def setUp(self):
        # -- recipe-init-begin
        # -- initialization boiler plate code
        wd = tempfile.mkdtemp()
        project = easy.Project(rnaseq, wd=wd)

        # declare the 3rd-party command-line tools we will use
        env = easy.Environment(rnaseq)
        # -- recipe-init-end

        # -- recipe-data-begin
        # Phage genome shipped with the package for testing purposes

        PHAGEFASTA = railroadtracks.model.simulate.PHAGEFASTA
        PHAGEGFF = railroadtracks.model.simulate.PHAGEGFF

        # create random data for 6 samples (just testing here)
        nsamples = 6
        samplereads = list()
        with open(PHAGEFASTA) as fasta_fh:
            reference = next(railroadtracks.model.simulate.readfasta_iter(fasta_fh))
        for sample_i in range(nsamples):
            read1_fh = tempfile.NamedTemporaryFile(prefix='read1', suffix='.fq')
            read2_fh = tempfile.NamedTemporaryFile(prefix='read2', suffix='.fq')
            read1_fh, read2_fh = railroadtracks.model.simulate.randomPEreads(read1_fh,
                                                                             read2_fh, 
                                                                             reference)
            samplereads.append((read1_fh, read2_fh))

        sampleinfo_fh = tempfile.NamedTemporaryFile(suffix='.csv', mode='w+')
        csv_w = csv.writer(sampleinfo_fh)
        csv_w.writerow(['sample_id', 'group'])
        for i in range(6):
            csv_w.writerow([str(i), ('A','B')[i%2]])
        sampleinfo_fh.flush()
        referenceannotation = rnaseq.GFFFile(PHAGEGFF)
        # -- recipe-data-end

        self._wd = wd
        self.project = project
        self.reference_fn = PHAGEFASTA
        self.env = env
        self.nsamples = nsamples
        self.samplereads = samplereads
        self.sampleinfo_fh = sampleinfo_fh
        self.referenceannotation = referenceannotation
        self._PHAGEFASTA = PHAGEFASTA
        self._PHAGEGFF = PHAGEGFF

    def tearDown(self):
        samplereads = self.samplereads
        # -- recipe-teardown-begin
        for read1_fh, read2_fh in self.samplereads:
            read1_fh.close()
            read2_fh.close()
        # FIXME: delete the temporary directory
        shutil.rmtree(self.project.wd)
        # -- recipe-teardown-end

    def test_File(self):
        #FIXME: rather test it in the model ?
        reference = core.File(self.reference_fn)

    @unittest.skipIf(not (environment.Executable.ispresent('bowtie2-build') and \
                          environment.Executable.ispresent('htseq-count') and \
                          environment.Executable.ispresent('R') and \
                          environment.R('R').packageversion_or_none('edgeR') is not None),
                     'bowtie2 and/or htseq-count is not in the PATH')
    def test_RecipeSimpleIncremental(self):
        project = self.project
        env = self.env
        nsamples = self.nsamples
        samplereads = self.samplereads
        sampleinfo_fh = self.sampleinfo_fh
        reference_fn = self.reference_fn
        referenceannotation = self.referenceannotation
        PHAGEFASTA = self._PHAGEFASTA
        PHAGEGFF = self._PHAGEGFF

        # steps used
        bowtie2index = env.activities.INDEX.bowtie2build
        bowtie2align = env.activities.ALIGN.bowtie2
        htseqcount = env.activities.QUANTIFY.htseqcount
        merge = env.activities.UTILITY.columnmerger
        edger = env.activities.DIFFEXP.edger

        from railroadtracks import easy

        # sequence of tasks to run
        torun = list()

        # index for alignment
        Assets = bowtie2index.Assets
        assets = Assets(Assets.Source(rnaseq.FASTAFile(reference_fn)),
                        Assets.Target.createundefined())
        task_index = project.add_task(bowtie2index, 
                                      assets)
        # the step is not done
        self.assertEqual(hortator._TASK_TODO, task_index.info[1])
        torun.append(task_index)
        # run the tasks
        for task in torun:
            # run only if not done
            if task.info[1] != hortator._TASK_DONE:
                task.execute()
                task.status = hortator._TASK_DONE

        self.assertEqual(1, project.persistent_graph.nconcrete_steps)
        # now that the tasks have run let's open the same project
        project_same = easy.Project(project.model, wd=project.wd)

        # index for alignment
        Assets = bowtie2index.Assets
        assets = Assets(Assets.Source(rnaseq.FASTAFile(reference_fn)),
                        Assets.Target.createundefined())
        task_index_same = project_same.add_task(bowtie2index, 
                                                assets)

        self.assertNotEqual(task_index, task_index_same)
        self.assertNotEqual(task_index.call.assets, task_index_same.call.assets)
        self.assertListEqual(list(task_index.call.assets.source.reference), 
                             list(task_index_same.call.assets.source.reference))
        self.assertListEqual(list(task_index.call.assets.target.indexfilepattern), 
                             list(task_index_same.call.assets.target.indexfilepattern))
        self.assertEqual(hortator._TASK_DONE, task_index_same.info[1])
        self.assertEqual(1, project.persistent_graph.nconcrete_steps)


    def _recipesimpleincremental(self, runtasks):
        project = self.project
        env = self.env
        nsamples = self.nsamples
        samplereads = self.samplereads
        sampleinfo_fh = self.sampleinfo_fh
        reference_fn = self.reference_fn
        referenceannotation = self.referenceannotation
        PHAGEFASTA = self._PHAGEFASTA
        PHAGEGFF = self._PHAGEGFF
        
        # steps used
        bowtie2index = env.activities.INDEX.bowtie2build
        bowtie2align = env.activities.ALIGN.bowtie2
        htseqcount = env.activities.QUANTIFY.htseqcount
        merge = env.activities.UTILITY.columnmerger
        edger = env.activities.DIFFEXP.edger

        for iteration in range(5):
            nextiteration = False
            # sequence of tasks to run
            torun = list()

            # index for alignment
            Assets = bowtie2index.Assets
            assets = Assets(Assets.Source(rnaseq.FASTAFile(reference_fn)),
                            Assets.Target.createundefined())
            task_index = project.add_task(bowtie2index, assets)
            torun.append(task_index)
            if iteration < 1:
                nextiteration = True
                runtasks(torun)
                self.assertEqual(1, project.persistent_graph.nconcrete_steps)
                continue
            # process all samples
            sample_counts = list()
            for sample_i, (read1_fh, read2_fh) in enumerate(samplereads):
                # align
                Assets = bowtie2align.Assets
                assets = Assets(Assets.Source(task_index.call.assets.target.indexfilepattern, 
                                              rnaseq.FASTQPossiblyGzipCompressed(read1_fh.name),
                                              rnaseq.FASTQPossiblyGzipCompressed(read2_fh.name)),
                                Assets.Target.createundefined())
                task_align = project.add_task(bowtie2align, assets)
                torun.append(task_align)
                if iteration < 2:
                    nextiteration = True
                    runtasks(torun)
                    self.assertEqual(1+(sample_i+1), project.persistent_graph.nconcrete_steps)
                    continue

                # quantify
                # (non-default parameters to fit our demo GFF)
                params = rnaseq.HTSeqCount._noexons_parameters
                Assets = htseqcount.Assets
                assets = Assets(Assets.Source(task_align.call.assets.target.alignment,
                                              rnaseq.GFFFile(referenceannotation)),
                                Assets.Target.createundefined())
                task_quantify = project.add_task(htseqcount,
                                                 assets,
                                                 parameters=params)
                torun.append(task_quantify)
                if iteration < 3:
                    nextiteration = True
                    runtasks(torun)
                    self.assertEqual(1+len(samplereads)+(sample_i+1), 
                                     project.persistent_graph.nconcrete_steps)
                    continue

                # keep a pointer to the counts, as we will use it in the merge step
                sample_counts.append(task_quantify.call.assets)

            if nextiteration:
                continue
            # merge the sample data into a table (so differential expression can be computed)
            Assets = merge.Assets
            counts = tuple(x.target.counts for x in sample_counts)
            assets = Assets(Assets.Source(rnaseq.CSVFileSequence(counts)),
                            merge.Assets.Target.createundefined())

            task_merge = project.add_task(merge,
                                          assets,
                                          parameters=("0", "1"))
            torun.append(task_merge)
            if iteration < 4:
                nextiteration = True
                runtasks(torun)
                self.assertEqual(1+2*len(samplereads)+1, 
                                 project.persistent_graph.nconcrete_steps)
                continue

            # differential expression with edgeR
            Assets = edger.Assets
            assets = Assets(Assets.Source(task_merge.call.assets.target.counts,
                                          rnaseq.CSVFile(sampleinfo_fh.name)),
                            Assets.Target.createundefined())
            task_de = project.add_task(edger,
                                       assets)
            if iteration < 5:
                nextiteration = True
                runtasks(torun)
                self.assertEqual(1+2*len(samplereads)+2, # 1 index + 2 FASTQ per sample + 1 merge + 1 differential expression
                                 project.persistent_graph.nconcrete_steps)
                continue

    @unittest.skipIf(not (environment.Executable.ispresent('bowtie2-build') and \
                          environment.Executable.ispresent('htseq-count') and \
                          environment.Executable.ispresent('R') and \
                          environment.R('R').packageversion_or_none('edgeR') is not None),
                     'bowtie2 and/or htseq-count is not in the PATH')
    def test_RecipeSimpleIncrementalComplete(self):
        def runtasks(torun):
            # run the tasks
            for task in torun:
                # run only if not done
                if task.info[1] != hortator._TASK_DONE:
                    task.execute()
        self._recipesimpleincremental(runtasks)

    @unittest.skipIf(not (environment.Executable.ispresent('bowtie2-build') and \
                          environment.Executable.ispresent('htseq-count') and \
                          environment.Executable.ispresent('R') and \
                          environment.R('R').packageversion_or_none('edgeR') is not None),
                     'bowtie2, htseq-count, R (with package "edgeR") must be in the PATH')
    def test_RecipeSimpleIncrementalCompleteNoRun(self):
        def runtasks(torun):
            # do nothing
            pass
        self._recipesimpleincremental(runtasks)


    @unittest.skipIf(not (environment.Executable.ispresent('bowtie2-build') and \
                          environment.Executable.ispresent('htseq-count') and \
                          environment.Executable.ispresent('R') and \
                          environment.R('R').packageversion_or_none('edgeR') is not None),
                     'bowtie2, htseq-count, R (with package "edgeR") must be in the PATH')
    def test_RecipeSimple(self):
        project = self.project
        env = self.env
        nsamples = self.nsamples
        samplereads = self.samplereads
        sampleinfo_fh = self.sampleinfo_fh
        reference_fn = self.reference_fn
        referenceannotation = self.referenceannotation
        PHAGEFASTA = self._PHAGEFASTA
        PHAGEGFF = self._PHAGEGFF
        
        # -- recipesimple-test-begin

        # steps used
        bowtie2index = env.activities.INDEX.bowtie2build
        bowtie2align = env.activities.ALIGN.bowtie2
        htseqcount = env.activities.QUANTIFY.htseqcount
        merge = env.activities.UTILITY.columnmerger
        edger = env.activities.DIFFEXP.edger

        from railroadtracks import easy

        # sequence of tasks to run
        torun = list()
                            
        # index for alignment
        Assets = bowtie2index.Assets
        assets = Assets(Assets.Source(rnaseq.FASTAFile(reference_fn)),
                        Assets.Target.createundefined())
        task_index = project.add_task(bowtie2index, assets)
        torun.append(task_index)

        # process all samples
        sample_counts = list()
        for read1_fh, read2_fh in samplereads:
            # align
            Assets = bowtie2align.Assets
            assets = Assets(Assets.Source(task_index.call.assets.target.indexfilepattern, 
                                          rnaseq.FASTQPossiblyGzipCompressed(read1_fh.name),
                                          rnaseq.FASTQPossiblyGzipCompressed(read2_fh.name)),
                            Assets.Target.createundefined())
            task_align = project.add_task(bowtie2align, assets)
            torun.append(task_align)

            # quantify
            # (non-default parameters to fit our demo GFF)
            params = rnaseq.HTSeqCount._noexons_parameters
            Assets = htseqcount.Assets
            assets = Assets(Assets.Source(task_align.call.assets.target.alignment,
                                          rnaseq.GFFFile(referenceannotation)),
                            Assets.Target.createundefined())
            task_quantify = project.add_task(htseqcount,
                                             assets,
                                             parameters=params)
            torun.append(task_quantify)
            # keep a pointer to the counts,
            # as we will use them in the merge step
            sample_counts.append(task_quantify.call.assets)

        # merge the sample data into a table
        # (so differential expression can be computed)
        Assets = merge.Assets
        counts = tuple(x.target.counts for x in sample_counts)
        assets = Assets(Assets.Source(rnaseq.CSVFileSequence(counts)),
                        merge.Assets.Target.createundefined())
        task_merge = project.add_task(merge,
                                      assets,
                                      parameters=("0","1"))
        torun.append(task_merge)

        # differential expression with edgeR
        Assets = edger.Assets
        assets = Assets(Assets.Source(task_merge.call.assets.target.counts,
                                      rnaseq.CSVFile(sampleinfo_fh.name)),
                        Assets.Target.createundefined())
        task_de = project.add_task(edger,
                                   assets)

        # run the tasks
        for task in torun:
            # run only if not done
            if task.info[1] != hortator._TASK_DONE:
                task.execute()

        # get results
        final_storedentities = project.get_targetsofactivity(rnaseq.ACTIVITY.DIFFEXP)

        # get the step that created the results files
        final_steps = list()
        for stored_entity in final_storedentities:
            final_steps.append(project.persistent_graph.get_parenttask_of_storedentity(stored_entity))
        
        # -- recipesimple-test-end
        
        self.assertEqual(1, len(final_storedentities))
        self.assertEqual(core.File.__name__, final_storedentities[0].clsname)
        self.assertEqual('railroadtracks.model.diffexp.EdgeR', final_steps[0].clsname)

        # FIXME: not yet implemented
        # now that we have all steps, we "only" have to run them
        #steps = todo.stepcrawler()
        #for s in steps:
        #    print('%s' % (s.unifiedname))
        #    s.run()

    @unittest.skipIf(not (environment.Executable.ispresent('bowtie2-build') and \
                          environment.Executable.ispresent('bowtie-build') and \
                          environment.Executable.ispresent('STAR') and \
                          environment.Executable.ispresent('R') and \
                          environment.R('R').packageversion_or_none('edgeR') is not None and \
                          environment.R('R').packageversion_or_none('DESeq') is not None and \
                          environment.R('R').packageversion_or_none('DESeq2') is not None and \
                          environment.R('R').packageversion_or_none('limma') is not None),
                     'bowtie2, bowtie, STAR, TopHat2, and R (with packages "edgeR", "DESeq", "DESeq2", "limma") must be in the PATH')
    def test_RecipeLoop(self):
        project = self.project
        env = self.env
        nsamples = self.nsamples
        samplereads = self.samplereads
        sampleinfo_fh = self.sampleinfo_fh
        reference_fn = self.reference_fn
        referenceannotation = self.referenceannotation
        PHAGEFASTA = self._PHAGEFASTA
        PHAGEGFF = self._PHAGEGFF

        # -- recipeloop-test-begin
        from railroadtracks import easy

        torun = list()

        # bowtie
        bowtie1index = env.activities.INDEX.bowtiebuild
        bowtie1align = env.activities.ALIGN.bowtie
        Assets = bowtie1index.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_bowtie1 = project.add_task(bowtie1index, 
                                              Assets(Assets.Source(fa_file),
                                                     None))
        torun.append(task_index_bowtie1)

        # bowtie2
        bowtie2index = env.activities.INDEX.bowtie2build
        bowtie2align = env.activities.ALIGN.bowtie2
        Assets = bowtie2index.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_bowtie2 = project.add_task(bowtie2index,
                                              Assets(Assets.Source(fa_file),
                                                     None))
        torun.append(task_index_bowtie2)

        # STAR
        starindex = env.activities.INDEX.starindex
        staralign = env.activities.ALIGN.staralign
        Assets = starindex.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_star = project.add_task(starindex, 
                                           Assets(Assets.Source(fa_file),
                                                  None))
        torun.append(task_index_star)

        # TopHat2
        # (index from bowtie2 used)
        #tophat2 = env.activities.ALIGN.tophat2

        # featureCount
        featurecount = env.activities.QUANTIFY.featurecount

        # Merge columns (obtained from counting)
        merge = env.activities.UTILITY.columnmerger

        # EdgeR, DESeq, DESeq2, and LIMMA voom
        edger = env.activities.DIFFEXP.edger
        deseq = env.activities.DIFFEXP.deseq
        deseq2 = env.activities.DIFFEXP.deseq2
        voom = env.activities.DIFFEXP.limmavoom
        

        # Now explore the different alignment presets in bowtie2, and vanilla star
        from itertools import cycle
        from collections import namedtuple
        Options = namedtuple('Options', 'aligner assets_index parameters')
        # Try various presets for bowtie2
        bowtie2_parameters = (('--very-fast', ), ('--fast', ), 
                              ('--sensitive', ), ('--very-sensitive', ))
        options = [Options(*x) for x in zip(cycle((bowtie2align,)),
                                            cycle((task_index_bowtie2.call.assets.target,)),
                                            bowtie2_parameters)]

        # add bowtie
        options.append(Options(bowtie1align, task_index_bowtie1.call.assets.target, tuple()))
        # add STAR (vanilla, no specific options beside the size of index k-mers)
        options.append(Options(staralign, 
                               task_index_star.call.assets.target, 
                               ('--genomeChrBinNbits', '12')))
        # add TopHat2
        #options.append(Options(tophat2, task_index_bowtie2.call.assets.target, tuple()))

        # loop over the options
        for option in options:
            sample_counts = list()
            # loop over the samples
            for sample_i in range(nsamples):
                read1_fh, read2_fh = samplereads[sample_i]
                # align
                Assets = option.aligner.Assets
                assets = Assets(Assets.Source(option.assets_index.indexfilepattern,
                                              rnaseq.FASTQPossiblyGzipCompressed(read1_fh.name), 
                                              rnaseq.FASTQPossiblyGzipCompressed(read2_fh.name)),
                                Assets.Target.createundefined())
                task_align = project.add_task(option.aligner,
                                              assets,
                                              parameters=option.parameters)
                torun.append(task_align)

                # quantify
                # (non-default parameters to fit our demo GFF)
                Assets = featurecount.Assets
                assets = Assets(Assets.Source(task_align.call.assets.target.alignment,
                                              rnaseq.GFFFile(referenceannotation)),
                                Assets.Target.createundefined())
                task_quantify = project.add_task(featurecount,
                                                 assets,
                                                 parameters = ('--gtf-featuretype', 'CDS',
                                                               '--gtf-attrtype', 'ID'))
                torun.append(task_quantify)

                # keep a pointer to the counts, as we will use it in the merge step
                sample_counts.append(task_quantify.call.assets)

            # merge the sample data into a table (so differential expression can be computed)
            Assets = merge.Assets
            source = Assets.Source(rnaseq.CSVFileSequence(tuple(x.target.counts\
                                                                for x in sample_counts)))
            assets_merge = Assets(source,
                                  Assets.Target.createundefined())
            task_merge = project.add_task(merge,
                                          assets_merge,
                                          parameters=("0","1"))
            torun.append(task_merge)

            # differential expression with edgeR, deseq2, and voom
            # (deseq is too whimsical for tests)
            for diffexp, params in ((edger, ()),
                                    (deseq, ('--dispersion-fittype=local', )), 
                                    (deseq2, ()),
                                    (voom, ())):
                Assets = diffexp.Assets
                assets = Assets(Assets.Source(task_merge.call.assets.target.counts,
                                              core.File(sampleinfo_fh.name)),
                                Assets.Target.createundefined())
                task_de = project.add_task(diffexp,assets)
                torun.append(task_de)

        # run the tasks
        # (this is an integration test rather than a unit test - the 
        # 3rd-party tools are often brittle and we want to keep the noise level down)
        env_log_level = environment.logger.level
        environment.logger.level = logging.ERROR
        try:
            for task in torun:
                if task.info[1] != hortator._TASK_DONE:
                    try:
                        task.execute()
                        status = easy.hortator._TASK_DONE
                    except:
                        status = easy.hortator._TASK_FAILED
                project.persistent_graph.step_concrete_state(hortator.DbID(task.task_id, False),
                                                             easy.hortator._TASK_STATUS_LIST[status])
        finally:
            environment.logger.level = env_log_level