def test_assetfactory(self): Foo = core.assetfactory('Foo', [core.AssetAttr('bar', core.File, '')]) # check that the asset bar is defined foo = Foo(core.File('')) # check that an incorrect type raises an error self.assertRaises(AssertionError, Foo, 123) self.assertTrue(foo.bar._defined) # check that incorrect parameters make it fail self.assertRaises(AssertionError, core.assetfactory, 'Foo', [core.AssetAttr('bar', core.File, None)]) # check that trying to modify an asset raises an error self.assertRaises(AttributeError, setattr, foo, 'bar', 123)
def test_assetfactory_allownone(self): # check that allownone=True allows unspecified assets Foo = core.assetfactory( 'Foo', [core.AssetAttr('bar', core.File, '', allownone=True)]) # check that the asset bar is defined foo = Foo(core.File('')) self.assertTrue(foo.bar._defined) # check that an incorrect type raises an error self.assertRaises(AssertionError, Foo, 123) foo = Foo(None) self.assertTrue(foo.bar is None) # check that trying to modify an asset raises an error self.assertRaises(AttributeError, setattr, foo, 'bar', 123)
def test_gettargetsofactivity(self): model = core.Model( tuple()) # getting away with an empty model for this step cache = self.cls_to_test(self.cache_file.name, model, force_create=True) split = Split(None) parameters = tuple() input_file = tempfile.NamedTemporaryFile(mode='w+') input_file.write('123') input_file.flush() head_file = tempfile.NamedTemporaryFile(mode='w+') tail_file = tempfile.NamedTemporaryFile(mode='w+') sources = split.Assets.Source(core.File(input_file.name)) targets = split.Assets.Target(core.File(head_file.name), core.File(tail_file.name)) self.assertRaises(ValueError, cache.get_targetsofactivity, ActivitiesSplit.HEADTAIL) # ensure that step variant is tracked stepvariant_db_id = cache.id_step_variant(split, split.activities) self.assertRaises(ValueError, cache.get_targetsofactivity, ActivitiesSplit.FOO) # get the targets of the activity (obviously there are not any yet) res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL) self.assertEqual(0, len(res)) # create a new task task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters) # there is only one such res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL) self.assertEqual(2, len(res)) # head_file = tempfile.NamedTemporaryFile(mode='w+') tail_file = tempfile.NamedTemporaryFile(mode='w+') targets_other = split.Assets.Target(core.File(head_file.name), core.File(tail_file.name)) task_id_other = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets_other, parameters=(1, )) res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL) self.assertEqual(4, len(res)) # -- cat = Cat(None) concat = tempfile.NamedTemporaryFile(mode='w+') sources = cat.Assets.Source( core.FileSequence((targets.head, targets_other.head))) targets = cat.Assets.Target(core.File(concat.name)) # ensure that step variant is tracked stepvariant_db_id = cache.id_step_variant(cat, cat.activities) task_id_cat = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters) res = cache.get_targetsofactivity(ActivitiesSplit.MERGE) self.assertEqual(1, len(res))
def test_gettargetsoftype(self): model = core.Model( tuple()) # getting away with an empty model for this step cache = self.cls_to_test(self.cache_file.name, model, force_create=True) class SplitCSV(Split): _name = 'split' activities = (core.DEFAULT_ACTIVITY.MISC, ) version = '0.1' _default_execpath = 'None' class Assets(core.AssetsStep): Source = core.assetfactory('Source', [ core.AssetAttr('file', railroadtracks.model.files.CSVFile, '') ]) Target = core.assetfactory('Target', [ core.AssetAttr('head', railroadtracks.model.files.CSVFile, ''), core.AssetAttr('tail', railroadtracks.model.files.CSVFile, '') ]) def run(self, assets, parameters=()): with open(assets.source.file, 'rb') as fh_in: csv_r = csv.reader(fh_in) with open(assets.target.head, 'wb') as fh_out: csv_w = csv.writer(fh_out) head = next(csv_r) csv_w.writerow(head) with open(assets.target.tail, 'wb') as fh_out: csv_w = csv.writer(fh_out) for row in csv_r: csv_w.writerow(row) cmd = None returncode = 1 return cmd, returncode split = Split(None) parameters = tuple() input_file = tempfile.NamedTemporaryFile(mode='w+') input_file.write('123') input_file.flush() head_file = tempfile.NamedTemporaryFile(mode='w+') tail_file = tempfile.NamedTemporaryFile(mode='w+') sources = split.Assets.Source(core.File(input_file.name)) targets = split.Assets.Target(core.File(head_file.name), core.File(tail_file.name)) stepvariant_db_id = cache.id_step_variant(split, split.activities) res = cache.get_targetsoftype(core.File.__name__) self.assertEqual(0, len(res)) task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters) res = cache.get_targetsoftype(core.File.__name__) self.assertEqual(2, len(res)) splitcsv = SplitCSV(None) parameters = tuple() input_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+') input_file.write('123') input_file.flush() head_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+') tail_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+') sources = splitcsv.Assets.Source( railroadtracks.model.files.CSVFile(input_file.name)) targets = splitcsv.Assets.Target( railroadtracks.model.files.CSVFile(head_file.name), railroadtracks.model.files.CSVFile(tail_file.name)) stepvariant_db_id = cache.id_step_variant(splitcsv, splitcsv.activities) res = cache.get_targetsoftype( railroadtracks.model.files.CSVFile.__name__) self.assertEqual(0, len(res)) task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters) res = cache.get_targetsoftype( railroadtracks.model.files.CSVFile.__name__) self.assertEqual(2, len(res)) head_file = tempfile.NamedTemporaryFile(mode='w+') tail_file = tempfile.NamedTemporaryFile(mode='w+') targets = split.Assets.Target(core.File(head_file.name), core.File(tail_file.name)) task_id_other = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters=(1, )) res = cache.get_targetsoftype(core.File.__name__) self.assertEqual(4, len(res))
def test_get_parenttask_of_storedentity(self): model = core.Model( tuple()) # getting away with an empty model for this step cache = self.cls_to_test(self.cache_file.name, model, force_create=True) split = Split(None) parameters = tuple() input_file = tempfile.NamedTemporaryFile(mode='w+') input_file.write('123') input_file.flush() head_file = tempfile.NamedTemporaryFile(mode='w+') tail_file = tempfile.NamedTemporaryFile(mode='w+') sources = split.Assets.Source(core.File(input_file.name)) targets = split.Assets.Target(core.File(head_file.name), core.File(tail_file.name)) # ensure that step variant is tracked stepvariant_db_id = cache.id_step_variant(split, split.activities) # create a new task task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets, parameters) for sa in cache.get_srcassets(task_id): # all are root nodes res = cache.get_parenttask_of_storedentity(sa) self.assertTrue(res is None) for sa in cache.get_targetassets(task_id): res = cache.get_parenttask_of_storedentity(sa) self.assertEqual(task_id.id, res.id) # head_file_other = tempfile.NamedTemporaryFile(mode='w+') tail_file_other = tempfile.NamedTemporaryFile(mode='w+') targets_other = split.Assets.Target(core.File(head_file_other.name), core.File(tail_file_other.name)) task_id_other = cache.id_stepconcrete(stepvariant_db_id.id, split.Assets.Source( targets.tail), targets_other, parameters=(1, )) for sa in cache.get_srcassets(task_id_other): res = cache.get_parenttask_of_storedentity(sa) self.assertEqual(task_id.id, res.id) for sa in cache.get_targetassets(task_id_other): res = cache.get_parenttask_of_storedentity(sa) self.assertEqual(task_id_other.id, res.id) # -- cat = Cat(None) sources_cat = cat.Assets.Source( core.FileSequence((targets.head, targets_other.head))) concat = tempfile.NamedTemporaryFile(mode='w+') targets_cat = cat.Assets.Target(core.File(concat.name)) # ensure that step variant is tracked stepvariant_db_id_cat = cache.id_step_variant(cat, cat.activities) task_id_cat = cache.id_stepconcrete(stepvariant_db_id_cat.id, sources_cat, targets_cat, parameters) for sa in cache.get_srcassets(task_id_cat): if hasattr(sa, 'iter_storedentities'): for sa_sub, t in zip(sa.iter_storedentities(), (task_id, task_id_other)): res = cache.get_parenttask_of_storedentity(sa_sub) self.assertEqual(t.id, res.id) else: res = cache.get_parenttask_of_storedentity(sa) self.assertEqual(task_id_cat.id, res.id) head_file_other = tempfile.NamedTemporaryFile(mode='w+') tail_file_other = tempfile.NamedTemporaryFile(mode='w+') targets_other = split.Assets.Target(core.File(head_file_other.name), core.File(tail_file_other.name)) task_id = cache.id_stepconcrete( stepvariant_db_id.id, split.Assets.Source(targets_cat.result), targets_other, tuple()) for sa in cache.get_srcassets(task_id): res = cache.get_parenttask_of_storedentity(sa) self.assertEqual(task_id_cat.id, res.id)
def test_File(self): #FIXME: rather test it in the model ? reference = core.File(self.reference_fn)
def test_RecipeLoop(self): project = self.project env = self.env nsamples = self.nsamples samplereads = self.samplereads sampleinfo_fh = self.sampleinfo_fh reference_fn = self.reference_fn referenceannotation = self.referenceannotation PHAGEFASTA = self._PHAGEFASTA PHAGEGFF = self._PHAGEGFF # -- recipeloop-test-begin from railroadtracks import easy torun = list() # bowtie bowtie1index = env.activities.INDEX.bowtiebuild bowtie1align = env.activities.ALIGN.bowtie Assets = bowtie1index.Assets fa_file = rnaseq.FASTAFile(reference_fn) task_index_bowtie1 = project.add_task(bowtie1index, Assets(Assets.Source(fa_file), None)) torun.append(task_index_bowtie1) # bowtie2 bowtie2index = env.activities.INDEX.bowtie2build bowtie2align = env.activities.ALIGN.bowtie2 Assets = bowtie2index.Assets fa_file = rnaseq.FASTAFile(reference_fn) task_index_bowtie2 = project.add_task(bowtie2index, Assets(Assets.Source(fa_file), None)) torun.append(task_index_bowtie2) # STAR starindex = env.activities.INDEX.starindex staralign = env.activities.ALIGN.staralign Assets = starindex.Assets fa_file = rnaseq.FASTAFile(reference_fn) task_index_star = project.add_task(starindex, Assets(Assets.Source(fa_file), None)) torun.append(task_index_star) # TopHat2 # (index from bowtie2 used) #tophat2 = env.activities.ALIGN.tophat2 # featureCount featurecount = env.activities.QUANTIFY.featurecount # Merge columns (obtained from counting) merge = env.activities.UTILITY.columnmerger # EdgeR, DESeq, DESeq2, and LIMMA voom edger = env.activities.DIFFEXP.edger deseq = env.activities.DIFFEXP.deseq deseq2 = env.activities.DIFFEXP.deseq2 voom = env.activities.DIFFEXP.limmavoom # Now explore the different alignment presets in bowtie2, and vanilla star from itertools import cycle from collections import namedtuple Options = namedtuple('Options', 'aligner assets_index parameters') # Try various presets for bowtie2 bowtie2_parameters = (('--very-fast', ), ('--fast', ), ('--sensitive', ), ('--very-sensitive', )) options = [Options(*x) for x in zip(cycle((bowtie2align,)), cycle((task_index_bowtie2.call.assets.target,)), bowtie2_parameters)] # add bowtie options.append(Options(bowtie1align, task_index_bowtie1.call.assets.target, tuple())) # add STAR (vanilla, no specific options beside the size of index k-mers) options.append(Options(staralign, task_index_star.call.assets.target, ('--genomeChrBinNbits', '12'))) # add TopHat2 #options.append(Options(tophat2, task_index_bowtie2.call.assets.target, tuple())) # loop over the options for option in options: sample_counts = list() # loop over the samples for sample_i in range(nsamples): read1_fh, read2_fh = samplereads[sample_i] # align Assets = option.aligner.Assets assets = Assets(Assets.Source(option.assets_index.indexfilepattern, rnaseq.FASTQPossiblyGzipCompressed(read1_fh.name), rnaseq.FASTQPossiblyGzipCompressed(read2_fh.name)), Assets.Target.createundefined()) task_align = project.add_task(option.aligner, assets, parameters=option.parameters) torun.append(task_align) # quantify # (non-default parameters to fit our demo GFF) Assets = featurecount.Assets assets = Assets(Assets.Source(task_align.call.assets.target.alignment, rnaseq.GFFFile(referenceannotation)), Assets.Target.createundefined()) task_quantify = project.add_task(featurecount, assets, parameters = ('--gtf-featuretype', 'CDS', '--gtf-attrtype', 'ID')) torun.append(task_quantify) # keep a pointer to the counts, as we will use it in the merge step sample_counts.append(task_quantify.call.assets) # merge the sample data into a table (so differential expression can be computed) Assets = merge.Assets source = Assets.Source(rnaseq.CSVFileSequence(tuple(x.target.counts\ for x in sample_counts))) assets_merge = Assets(source, Assets.Target.createundefined()) task_merge = project.add_task(merge, assets_merge, parameters=("0","1")) torun.append(task_merge) # differential expression with edgeR, deseq2, and voom # (deseq is too whimsical for tests) for diffexp, params in ((edger, ()), (deseq, ('--dispersion-fittype=local', )), (deseq2, ()), (voom, ())): Assets = diffexp.Assets assets = Assets(Assets.Source(task_merge.call.assets.target.counts, core.File(sampleinfo_fh.name)), Assets.Target.createundefined()) task_de = project.add_task(diffexp,assets) torun.append(task_de) # run the tasks # (this is an integration test rather than a unit test - the # 3rd-party tools are often brittle and we want to keep the noise level down) env_log_level = environment.logger.level environment.logger.level = logging.ERROR try: for task in torun: if task.info[1] != hortator._TASK_DONE: try: task.execute() status = easy.hortator._TASK_DONE except: status = easy.hortator._TASK_FAILED project.persistent_graph.step_concrete_state(hortator.DbID(task.task_id, False), easy.hortator._TASK_STATUS_LIST[status]) finally: environment.logger.level = env_log_level