def testAddTaskSet(self): ts = easy.TaskSet() ts.add(self.task) # tsg = tasksetgraph.TaskSetGraph() tsindex_id = tsg.add_taskset(ts) dict_retrieve = tsg.digraph.node[tasksetgraph.get_tasksetid( (self.task, ))] ts_retrieve = dict_retrieve['taskset'] self.assertEqual(len(ts), len(ts_retrieve)) self.assertEqual(tuple(x.task_id for x in ts), tuple(x.task_id for x in ts_retrieve)) # add connected tasks taskset_align = easy.TaskSet() aligner = rnaseq.Bowtie2() for fq_name in ('foo.fq', 'bar.fq'): Assets = aligner.Assets assets = Assets( Assets.Source(self.task.call.assets.target.indexfilepattern, rnaseq.FASTQPossiblyGzipCompressed(fq_name), None), None) task_align = self.project.add_task(aligner, assets) taskset_align.add(task_align) tsalign_id = tsg.add_taskset(taskset_align) lst = tsg.execution_list() self.assertEqual(tsindex_id, tasksetgraph.get_tasksetid(lst[0].taskset)) self.assertEqual(tsalign_id, tasksetgraph.get_tasksetid(lst[1].taskset))
def testAddTaskSetDuplicateTask(self): ts = easy.TaskSet() ts.add(self.task) tsg = tasksetgraph.TaskSetGraph() tsg.add_taskset(ts) ts2 = easy.TaskSet() ts2.add(self.task) self.assertRaises(ValueError, tsg.add_taskset, ts2)
def testExecutionList(self): tsg = tasksetgraph.TaskSetGraph() # initial task/taskset is building a bowtie2 index tsindex_id = tsg.add(self.task) lst = tsg.execution_list() self.assertEqual(1, len(lst)) # add a second task set that performs the alignment of several pairs of # FASTQ files n_fastq = 3 pairnames = self._create_fastq(n_fastq, self.wd2) index_task = self.task bowtie2 = rnaseq.Bowtie2() Assets = bowtie2.Assets # Class to model FASTQ files that are optionally compressed FASTQ = rnaseq.FASTQPossiblyGzipCompressed ts = easy.TaskSet() # note that we are included the pair of FASTQ already aligned above for read1_fn, read2_fn in pairnames: task = self.project.add_task( bowtie2, Assets( Assets.Source( index_task.call.assets.target.indexfilepattern, FASTQ(read1_fn), FASTQ(read2_fn)))) ts.add(task) tsalign_id = tsg.add(ts) lst = tsg.execution_list()
def add_taskset(self, taskset, label=None, mapper=None, filter=None): """ Add a TaskSet to the graph. The dependency relationships with other :class:`TaskSet` objects are inferred from the assets in the individual :class:`Task` objects. :param taskset: a :class:`easy.tasks.TaskSet` :param label: a label for the `taskset` :param mapper: a task mapper to use with this taskset :param filter: a task filter to use this this taskset """ self._ensure_taskset_project(taskset) self._ensure_task_unique(taskset) tasksetid = get_tasksetid(taskset) # make a copy to prevent problems if further changes in the TaskSet performed later # in the enclosing frame taskset_copy = easy.TaskSet() if mapper is None: mapper = self.defaultmapper self._digraph.add_node(tasksetid, attr_dict={'label': label, 'taskset': taskset_copy, 'mapper': mapper, 'filter': filter}) for task in taskset: self._taskid2tasksetid[task.task_id] = tasksetid taskset_copy.add(task) super(TaskSetGraph, self).add(task) # parent tasks taskqueue = deque((x, tasksetid) for x in task.parent_tasks()) while len(taskqueue) > 0: parent_task, child_tasksetid = taskqueue.pop() if parent_task.task_id in self._taskid2tasksetid: # taskset in which the parent is present p_tasksetid = self._taskid2tasksetid[parent_task.task_id] if not self._digraph.has_edge(p_tasksetid, child_tasksetid): self._digraph.add_edge(p_tasksetid, tasksetid) # add the parents to the queue to reconstruct the graph of tasksets for p in parent_task.parent_tasks(): taskqueue.appendleft((p, p_tasksetid)) else: pass # child tasks # (note: taskqueue is empty again when reaching here) taskqueue = deque((tasksetid, x) for x in task.child_tasks()) while len(taskqueue) > 0: parent_tasksetid, child_task = taskqueue.pop() if child_task.task_id in self._taskid2tasksetid: # taskset in which the child is present c_tasksetid = self._taskid2tasksetid[child_task.task_id] if not self._digraph.has_edge(tasksetid, c_tasksetid): self._digraph.add_edge(tasksetid, c_tasksetid) # add the children to the queue to reconstruct the graph of tasksets for c in child_task.child_tasks(): taskqueue.appendleft((tasksetid, c)) else: pass return tasksetid
def testExecution(self): tsg = tasksetgraph.TaskSetGraph() tsg.add(self.task) n_fastq = 3 pairnames = self._create_fastq(n_fastq, self.wd2) index_task = self.task bowtie2 = rnaseq.Bowtie2() Assets = bowtie2.Assets FASTQ = rnaseq.FASTQPossiblyGzipCompressed ts = easy.TaskSet() for read1_fn, read2_fn in pairnames: task = self.project.add_task( bowtie2, Assets( Assets.Source( index_task.call.assets.target.indexfilepattern, FASTQ(read1_fn), FASTQ(read2_fn)))) ts.add(task) tsg.add(ts) # no task mapper, check that an exception is raised self.assertRaises(ValueError, tsg.execute) # set the default task mapper to a multiprocessing based one n_processes = 1 mpe = easy.MultiprocessingExecution(n_processes) tsg.defaultmapper = mpe tsg.execute() # use a run-only-once filter (that is only try to execute # tasks with the status "TO DO" flt = lambda taskset: taskset.filter_on_status(easy._TASK_TODO) tsg.defaulttasksetfilter = flt lst = tsg.execution_list() for elt in lst: ts_f = tsg._filtered_taskset(elt) # check that nothing is left to be exectuted # (since everything has been run earlier) self.assertEqual(0, len(ts_f))
def add(self, obj, label=None, mapper=None, filter=None): """ Generic add method, allowing the addition of either a Task or TaskSet object""" if isinstance(obj, easy.Task): obj = easy.TaskSet(iterable=(obj,)) assert isinstance(obj, easy.TaskSet) self.add_taskset(obj, label=label, mapper=mapper)