Ejemplo n.º 1
0
 def test_assetfactory(self):
     Foo = core.assetfactory('Foo', [core.AssetAttr('bar', core.File, '')])
     # check that the asset bar is defined
     foo = Foo(core.File(''))
     # check that an incorrect type raises an error
     self.assertRaises(AssertionError, Foo, 123)
     self.assertTrue(foo.bar._defined)
     # check that incorrect parameters make it fail
     self.assertRaises(AssertionError, core.assetfactory, 'Foo',
                       [core.AssetAttr('bar', core.File, None)])
     # check that trying to modify an asset raises an error
     self.assertRaises(AttributeError, setattr, foo, 'bar', 123)
Ejemplo n.º 2
0
 def test_assetfactory_allownone(self):
     # check that allownone=True allows unspecified assets
     Foo = core.assetfactory(
         'Foo', [core.AssetAttr('bar', core.File, '', allownone=True)])
     # check that the asset bar is defined
     foo = Foo(core.File(''))
     self.assertTrue(foo.bar._defined)
     # check that an incorrect type raises an error
     self.assertRaises(AssertionError, Foo, 123)
     foo = Foo(None)
     self.assertTrue(foo.bar is None)
     # check that trying to modify an asset raises an error
     self.assertRaises(AttributeError, setattr, foo, 'bar', 123)
Ejemplo n.º 3
0
 def test_gettargetsofactivity(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     cache = self.cls_to_test(self.cache_file.name,
                              model,
                              force_create=True)
     split = Split(None)
     parameters = tuple()
     input_file = tempfile.NamedTemporaryFile(mode='w+')
     input_file.write('123')
     input_file.flush()
     head_file = tempfile.NamedTemporaryFile(mode='w+')
     tail_file = tempfile.NamedTemporaryFile(mode='w+')
     sources = split.Assets.Source(core.File(input_file.name))
     targets = split.Assets.Target(core.File(head_file.name),
                                   core.File(tail_file.name))
     self.assertRaises(ValueError, cache.get_targetsofactivity,
                       ActivitiesSplit.HEADTAIL)
     # ensure that step variant is tracked
     stepvariant_db_id = cache.id_step_variant(split, split.activities)
     self.assertRaises(ValueError, cache.get_targetsofactivity,
                       ActivitiesSplit.FOO)
     # get the targets of the activity (obviously there are not any yet)
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(0, len(res))
     # create a new task
     task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                     parameters)
     # there is only one such
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(2, len(res))
     #
     head_file = tempfile.NamedTemporaryFile(mode='w+')
     tail_file = tempfile.NamedTemporaryFile(mode='w+')
     targets_other = split.Assets.Target(core.File(head_file.name),
                                         core.File(tail_file.name))
     task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                           sources,
                                           targets_other,
                                           parameters=(1, ))
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(4, len(res))
     # --
     cat = Cat(None)
     concat = tempfile.NamedTemporaryFile(mode='w+')
     sources = cat.Assets.Source(
         core.FileSequence((targets.head, targets_other.head)))
     targets = cat.Assets.Target(core.File(concat.name))
     # ensure that step variant is tracked
     stepvariant_db_id = cache.id_step_variant(cat, cat.activities)
     task_id_cat = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                         targets, parameters)
     res = cache.get_targetsofactivity(ActivitiesSplit.MERGE)
     self.assertEqual(1, len(res))
Ejemplo n.º 4
0
    def test_gettargetsoftype(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class SplitCSV(Split):
            _name = 'split'
            activities = (core.DEFAULT_ACTIVITY.MISC, )
            version = '0.1'
            _default_execpath = 'None'

            class Assets(core.AssetsStep):
                Source = core.assetfactory('Source', [
                    core.AssetAttr('file', railroadtracks.model.files.CSVFile,
                                   '')
                ])
                Target = core.assetfactory('Target', [
                    core.AssetAttr('head', railroadtracks.model.files.CSVFile,
                                   ''),
                    core.AssetAttr('tail', railroadtracks.model.files.CSVFile,
                                   '')
                ])

            def run(self, assets, parameters=()):
                with open(assets.source.file, 'rb') as fh_in:
                    csv_r = csv.reader(fh_in)
                    with open(assets.target.head, 'wb') as fh_out:
                        csv_w = csv.writer(fh_out)
                        head = next(csv_r)
                        csv_w.writerow(head)
                    with open(assets.target.tail, 'wb') as fh_out:
                        csv_w = csv.writer(fh_out)
                        for row in csv_r:
                            csv_w.writerow(row)
                cmd = None
                returncode = 1
                return cmd, returncode

        split = Split(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        sources = split.Assets.Source(core.File(input_file.name))
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        stepvariant_db_id = cache.id_step_variant(split, split.activities)
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(0, len(res))
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(2, len(res))

        splitcsv = SplitCSV(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        tail_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        sources = splitcsv.Assets.Source(
            railroadtracks.model.files.CSVFile(input_file.name))
        targets = splitcsv.Assets.Target(
            railroadtracks.model.files.CSVFile(head_file.name),
            railroadtracks.model.files.CSVFile(tail_file.name))
        stepvariant_db_id = cache.id_step_variant(splitcsv,
                                                  splitcsv.activities)
        res = cache.get_targetsoftype(
            railroadtracks.model.files.CSVFile.__name__)
        self.assertEqual(0, len(res))
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        res = cache.get_targetsoftype(
            railroadtracks.model.files.CSVFile.__name__)
        self.assertEqual(2, len(res))

        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                              sources,
                                              targets,
                                              parameters=(1, ))
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(4, len(res))
Ejemplo n.º 5
0
    def test_get_parenttask_of_storedentity(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)
        split = Split(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        sources = split.Assets.Source(core.File(input_file.name))
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        # ensure that step variant is tracked
        stepvariant_db_id = cache.id_step_variant(split, split.activities)
        # create a new task
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        for sa in cache.get_srcassets(task_id):
            # all are root nodes
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertTrue(res is None)
        for sa in cache.get_targetassets(task_id):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id.id, res.id)
        #
        head_file_other = tempfile.NamedTemporaryFile(mode='w+')
        tail_file_other = tempfile.NamedTemporaryFile(mode='w+')
        targets_other = split.Assets.Target(core.File(head_file_other.name),
                                            core.File(tail_file_other.name))
        task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                              split.Assets.Source(
                                                  targets.tail),
                                              targets_other,
                                              parameters=(1, ))
        for sa in cache.get_srcassets(task_id_other):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id.id, res.id)
        for sa in cache.get_targetassets(task_id_other):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id_other.id, res.id)

        # --
        cat = Cat(None)
        sources_cat = cat.Assets.Source(
            core.FileSequence((targets.head, targets_other.head)))
        concat = tempfile.NamedTemporaryFile(mode='w+')
        targets_cat = cat.Assets.Target(core.File(concat.name))
        # ensure that step variant is tracked
        stepvariant_db_id_cat = cache.id_step_variant(cat, cat.activities)
        task_id_cat = cache.id_stepconcrete(stepvariant_db_id_cat.id,
                                            sources_cat, targets_cat,
                                            parameters)
        for sa in cache.get_srcassets(task_id_cat):
            if hasattr(sa, 'iter_storedentities'):
                for sa_sub, t in zip(sa.iter_storedentities(),
                                     (task_id, task_id_other)):
                    res = cache.get_parenttask_of_storedentity(sa_sub)
                    self.assertEqual(t.id, res.id)
            else:
                res = cache.get_parenttask_of_storedentity(sa)
                self.assertEqual(task_id_cat.id, res.id)

        head_file_other = tempfile.NamedTemporaryFile(mode='w+')
        tail_file_other = tempfile.NamedTemporaryFile(mode='w+')
        targets_other = split.Assets.Target(core.File(head_file_other.name),
                                            core.File(tail_file_other.name))
        task_id = cache.id_stepconcrete(
            stepvariant_db_id.id, split.Assets.Source(targets_cat.result),
            targets_other, tuple())
        for sa in cache.get_srcassets(task_id):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id_cat.id, res.id)
Ejemplo n.º 6
0
 def test_File(self):
     #FIXME: rather test it in the model ?
     reference = core.File(self.reference_fn)
Ejemplo n.º 7
0
    def test_RecipeLoop(self):
        project = self.project
        env = self.env
        nsamples = self.nsamples
        samplereads = self.samplereads
        sampleinfo_fh = self.sampleinfo_fh
        reference_fn = self.reference_fn
        referenceannotation = self.referenceannotation
        PHAGEFASTA = self._PHAGEFASTA
        PHAGEGFF = self._PHAGEGFF

        # -- recipeloop-test-begin
        from railroadtracks import easy

        torun = list()

        # bowtie
        bowtie1index = env.activities.INDEX.bowtiebuild
        bowtie1align = env.activities.ALIGN.bowtie
        Assets = bowtie1index.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_bowtie1 = project.add_task(bowtie1index, 
                                              Assets(Assets.Source(fa_file),
                                                     None))
        torun.append(task_index_bowtie1)

        # bowtie2
        bowtie2index = env.activities.INDEX.bowtie2build
        bowtie2align = env.activities.ALIGN.bowtie2
        Assets = bowtie2index.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_bowtie2 = project.add_task(bowtie2index,
                                              Assets(Assets.Source(fa_file),
                                                     None))
        torun.append(task_index_bowtie2)

        # STAR
        starindex = env.activities.INDEX.starindex
        staralign = env.activities.ALIGN.staralign
        Assets = starindex.Assets
        fa_file = rnaseq.FASTAFile(reference_fn)
        task_index_star = project.add_task(starindex, 
                                           Assets(Assets.Source(fa_file),
                                                  None))
        torun.append(task_index_star)

        # TopHat2
        # (index from bowtie2 used)
        #tophat2 = env.activities.ALIGN.tophat2

        # featureCount
        featurecount = env.activities.QUANTIFY.featurecount

        # Merge columns (obtained from counting)
        merge = env.activities.UTILITY.columnmerger

        # EdgeR, DESeq, DESeq2, and LIMMA voom
        edger = env.activities.DIFFEXP.edger
        deseq = env.activities.DIFFEXP.deseq
        deseq2 = env.activities.DIFFEXP.deseq2
        voom = env.activities.DIFFEXP.limmavoom
        

        # Now explore the different alignment presets in bowtie2, and vanilla star
        from itertools import cycle
        from collections import namedtuple
        Options = namedtuple('Options', 'aligner assets_index parameters')
        # Try various presets for bowtie2
        bowtie2_parameters = (('--very-fast', ), ('--fast', ), 
                              ('--sensitive', ), ('--very-sensitive', ))
        options = [Options(*x) for x in zip(cycle((bowtie2align,)),
                                            cycle((task_index_bowtie2.call.assets.target,)),
                                            bowtie2_parameters)]

        # add bowtie
        options.append(Options(bowtie1align, task_index_bowtie1.call.assets.target, tuple()))
        # add STAR (vanilla, no specific options beside the size of index k-mers)
        options.append(Options(staralign, 
                               task_index_star.call.assets.target, 
                               ('--genomeChrBinNbits', '12')))
        # add TopHat2
        #options.append(Options(tophat2, task_index_bowtie2.call.assets.target, tuple()))

        # loop over the options
        for option in options:
            sample_counts = list()
            # loop over the samples
            for sample_i in range(nsamples):
                read1_fh, read2_fh = samplereads[sample_i]
                # align
                Assets = option.aligner.Assets
                assets = Assets(Assets.Source(option.assets_index.indexfilepattern,
                                              rnaseq.FASTQPossiblyGzipCompressed(read1_fh.name), 
                                              rnaseq.FASTQPossiblyGzipCompressed(read2_fh.name)),
                                Assets.Target.createundefined())
                task_align = project.add_task(option.aligner,
                                              assets,
                                              parameters=option.parameters)
                torun.append(task_align)

                # quantify
                # (non-default parameters to fit our demo GFF)
                Assets = featurecount.Assets
                assets = Assets(Assets.Source(task_align.call.assets.target.alignment,
                                              rnaseq.GFFFile(referenceannotation)),
                                Assets.Target.createundefined())
                task_quantify = project.add_task(featurecount,
                                                 assets,
                                                 parameters = ('--gtf-featuretype', 'CDS',
                                                               '--gtf-attrtype', 'ID'))
                torun.append(task_quantify)

                # keep a pointer to the counts, as we will use it in the merge step
                sample_counts.append(task_quantify.call.assets)

            # merge the sample data into a table (so differential expression can be computed)
            Assets = merge.Assets
            source = Assets.Source(rnaseq.CSVFileSequence(tuple(x.target.counts\
                                                                for x in sample_counts)))
            assets_merge = Assets(source,
                                  Assets.Target.createundefined())
            task_merge = project.add_task(merge,
                                          assets_merge,
                                          parameters=("0","1"))
            torun.append(task_merge)

            # differential expression with edgeR, deseq2, and voom
            # (deseq is too whimsical for tests)
            for diffexp, params in ((edger, ()),
                                    (deseq, ('--dispersion-fittype=local', )), 
                                    (deseq2, ()),
                                    (voom, ())):
                Assets = diffexp.Assets
                assets = Assets(Assets.Source(task_merge.call.assets.target.counts,
                                              core.File(sampleinfo_fh.name)),
                                Assets.Target.createundefined())
                task_de = project.add_task(diffexp,assets)
                torun.append(task_de)

        # run the tasks
        # (this is an integration test rather than a unit test - the 
        # 3rd-party tools are often brittle and we want to keep the noise level down)
        env_log_level = environment.logger.level
        environment.logger.level = logging.ERROR
        try:
            for task in torun:
                if task.info[1] != hortator._TASK_DONE:
                    try:
                        task.execute()
                        status = easy.hortator._TASK_DONE
                    except:
                        status = easy.hortator._TASK_FAILED
                project.persistent_graph.step_concrete_state(hortator.DbID(task.task_id, False),
                                                             easy.hortator._TASK_STATUS_LIST[status])
        finally:
            environment.logger.level = env_log_level