def testAddTaskSet(self):
        ts = easy.TaskSet()
        ts.add(self.task)
        #
        tsg = tasksetgraph.TaskSetGraph()
        tsindex_id = tsg.add_taskset(ts)
        dict_retrieve = tsg.digraph.node[tasksetgraph.get_tasksetid(
            (self.task, ))]
        ts_retrieve = dict_retrieve['taskset']
        self.assertEqual(len(ts), len(ts_retrieve))
        self.assertEqual(tuple(x.task_id for x in ts),
                         tuple(x.task_id for x in ts_retrieve))

        # add connected tasks
        taskset_align = easy.TaskSet()
        aligner = rnaseq.Bowtie2()
        for fq_name in ('foo.fq', 'bar.fq'):
            Assets = aligner.Assets
            assets = Assets(
                Assets.Source(self.task.call.assets.target.indexfilepattern,
                              rnaseq.FASTQPossiblyGzipCompressed(fq_name),
                              None), None)
            task_align = self.project.add_task(aligner, assets)
            taskset_align.add(task_align)
        tsalign_id = tsg.add_taskset(taskset_align)
        lst = tsg.execution_list()
        self.assertEqual(tsindex_id,
                         tasksetgraph.get_tasksetid(lst[0].taskset))
        self.assertEqual(tsalign_id,
                         tasksetgraph.get_tasksetid(lst[1].taskset))
    def testAddTaskSetDuplicateTask(self):
        ts = easy.TaskSet()
        ts.add(self.task)
        tsg = tasksetgraph.TaskSetGraph()
        tsg.add_taskset(ts)

        ts2 = easy.TaskSet()
        ts2.add(self.task)
        self.assertRaises(ValueError, tsg.add_taskset, ts2)
    def testExecutionList(self):
        tsg = tasksetgraph.TaskSetGraph()
        # initial task/taskset is building a bowtie2 index
        tsindex_id = tsg.add(self.task)
        lst = tsg.execution_list()
        self.assertEqual(1, len(lst))

        # add a second task set that performs the alignment of several pairs of
        # FASTQ files
        n_fastq = 3
        pairnames = self._create_fastq(n_fastq, self.wd2)

        index_task = self.task
        bowtie2 = rnaseq.Bowtie2()
        Assets = bowtie2.Assets
        # Class to model FASTQ files that are optionally compressed
        FASTQ = rnaseq.FASTQPossiblyGzipCompressed
        ts = easy.TaskSet()
        # note that we are included the pair of FASTQ already aligned above
        for read1_fn, read2_fn in pairnames:
            task = self.project.add_task(
                bowtie2,
                Assets(
                    Assets.Source(
                        index_task.call.assets.target.indexfilepattern,
                        FASTQ(read1_fn), FASTQ(read2_fn))))
            ts.add(task)
        tsalign_id = tsg.add(ts)
        lst = tsg.execution_list()
Esempio n. 4
0
    def add_taskset(self, taskset, label=None, mapper=None, filter=None):
        """ Add a TaskSet to the graph. 
        The dependency relationships with other :class:`TaskSet` objects
        are inferred from the assets in the individual :class:`Task` objects.

        :param taskset: a :class:`easy.tasks.TaskSet`
        :param label: a label for the `taskset`
        :param mapper: a task mapper to use with this taskset
        :param filter: a task filter to use this this taskset

        """
        self._ensure_taskset_project(taskset)
        self._ensure_task_unique(taskset)
        tasksetid = get_tasksetid(taskset)
        # make a copy to prevent problems if further changes in the TaskSet performed later
        # in the enclosing frame
        taskset_copy = easy.TaskSet()
        if mapper is None:
            mapper = self.defaultmapper
        self._digraph.add_node(tasksetid, attr_dict={'label': label,
                                                     'taskset': taskset_copy,
                                                     'mapper': mapper,
                                                     'filter': filter})
        for task in taskset:
            self._taskid2tasksetid[task.task_id] = tasksetid
            taskset_copy.add(task)
            super(TaskSetGraph, self).add(task)
            # parent tasks
            taskqueue = deque((x, tasksetid) for x in task.parent_tasks())
            while len(taskqueue) > 0:
                parent_task, child_tasksetid = taskqueue.pop()
                if parent_task.task_id in self._taskid2tasksetid:
                    # taskset in which the parent is present
                    p_tasksetid = self._taskid2tasksetid[parent_task.task_id]
                    if not self._digraph.has_edge(p_tasksetid, child_tasksetid):
                        self._digraph.add_edge(p_tasksetid, tasksetid)
                        # add the parents to the queue to reconstruct the graph of tasksets
                        for p in parent_task.parent_tasks():
                            taskqueue.appendleft((p, p_tasksetid))
                else:
                    pass
            # child tasks
            # (note: taskqueue is empty again when reaching here)
            taskqueue = deque((tasksetid, x) for x in task.child_tasks())
            while len(taskqueue) > 0:
                parent_tasksetid, child_task = taskqueue.pop()
                if child_task.task_id in self._taskid2tasksetid:
                    # taskset in which the child is present
                    c_tasksetid = self._taskid2tasksetid[child_task.task_id]
                    if not self._digraph.has_edge(tasksetid, c_tasksetid):
                        self._digraph.add_edge(tasksetid, c_tasksetid)
                        # add the children to the queue to reconstruct the graph of tasksets
                        for c in child_task.child_tasks():
                            taskqueue.appendleft((tasksetid, c))
                else:
                    pass
        return tasksetid
    def testExecution(self):
        tsg = tasksetgraph.TaskSetGraph()
        tsg.add(self.task)
        n_fastq = 3
        pairnames = self._create_fastq(n_fastq, self.wd2)

        index_task = self.task
        bowtie2 = rnaseq.Bowtie2()
        Assets = bowtie2.Assets
        FASTQ = rnaseq.FASTQPossiblyGzipCompressed
        ts = easy.TaskSet()
        for read1_fn, read2_fn in pairnames:
            task = self.project.add_task(
                bowtie2,
                Assets(
                    Assets.Source(
                        index_task.call.assets.target.indexfilepattern,
                        FASTQ(read1_fn), FASTQ(read2_fn))))
            ts.add(task)
        tsg.add(ts)
        # no task mapper, check that an exception is raised
        self.assertRaises(ValueError, tsg.execute)

        # set the default task mapper to a multiprocessing based one
        n_processes = 1
        mpe = easy.MultiprocessingExecution(n_processes)
        tsg.defaultmapper = mpe
        tsg.execute()

        # use a run-only-once filter (that is only try to execute
        # tasks with the status "TO DO"
        flt = lambda taskset: taskset.filter_on_status(easy._TASK_TODO)
        tsg.defaulttasksetfilter = flt
        lst = tsg.execution_list()
        for elt in lst:
            ts_f = tsg._filtered_taskset(elt)
            # check that nothing is left to be exectuted
            # (since everything has been run earlier)
            self.assertEqual(0, len(ts_f))
Esempio n. 6
0
 def add(self, obj, label=None, mapper=None, filter=None):
     """ Generic add method, allowing the addition of either a Task or TaskSet object"""
     if isinstance(obj, easy.Task):
         obj = easy.TaskSet(iterable=(obj,))
     assert isinstance(obj, easy.TaskSet)
     self.add_taskset(obj, label=label, mapper=mapper)