Exemple #1
0
 def spawn_tasks(self, parent_task_id, tasks):
     parent_task = self.task_graph.get_task(parent_task_id)
     
     tx = TaskGraphUpdate()
     
     for task_descriptor in tasks:
         task_object = build_taskpool_task_from_descriptor(task_descriptor, None, parent_task)
         tx.spawn(task_object)
     
     tx.commit(self.task_graph)
Exemple #2
0
    def commit_task(self, task_id, bindings, saved_continuation_uri=None, replay_uuid_list=None):
        
        task = self.task_graph.get_task(task_id)
        tx = TaskGraphUpdate()
        
        for id, ref in bindings.items():
            tx.publish(ref, task)
            
        tx.commit(self.task_graph)

        task.state = TASK_COMMITTED
Exemple #3
0
    def report_tasks(self, report, toplevel_task_id):

        task = self.task_graph.get_task(toplevel_task_id)

        tx = TaskGraphUpdate()

        for (parent_id, success, payload) in report:
            parent_task = self.task_graph.get_task(parent_id)
            if success:
                (spawned, published) = payload

                for child in spawned:
                    child_task = build_taskpool_task_from_descriptor(
                        child, parent_task)
                    tx.spawn(child_task)
                    parent_task.children.append(child_task)

                for ref in published:
                    tx.publish(ref, parent_task)

            else:
                # Only one failed task per-report, at the moment.
                self.investigate_task_failure(parent_task, payload)
                self.lazy_task_pool.worker_pool.worker_idle(
                    toplevel_task_id.worker)
                ciel.engine.publish('schedule')
                return

        tx.commit(self.task_graph)
Exemple #4
0
    def report_tasks(self, report, toplevel_task_id):

        task = self.task_graph.get_task(toplevel_task_id)

        tx = TaskGraphUpdate()
        
        for (parent_id, success, payload) in report:
            parent_task = self.task_graph.get_task(parent_id)
            if success:
                (spawned, published) = payload
                
                for child in spawned:
                    child_task = build_taskpool_task_from_descriptor(child, parent_task)
                    tx.spawn(child_task)
                    parent_task.children.append(child_task)
                
                for ref in published:
                    tx.publish(ref, parent_task)
            
            else:
                # Only one failed task per-report, at the moment.
                self.investigate_task_failure(parent_task, payload)
                self.lazy_task_pool.worker_pool.worker_idle(toplevel_task_id.worker)
                ciel.engine.publish('schedule')
                return
                
        tx.commit(self.task_graph)
Exemple #5
0
    def spawn_and_publish(self, spawns, refs, producer=None, taskset=None):

        producer_task = None
        if producer is not None:
            producer_task = self.get_task(producer["task_id"])
            taskset = producer_task.taskset
        upd = TaskGraphUpdate()
        for spawn in spawns:
            task_object = build_taskpool_task_from_descriptor(
                spawn, producer_task, taskset)
            upd.spawn(task_object)
        for ref in refs:
            upd.publish(ref, producer_task)
        upd.commit(self)
Exemple #6
0
    def _report_tasks(self, report, toplevel_task, worker):
        with self._lock:

            tx = TaskGraphUpdate()

            root_task = self.task_graph.get_task(report[0][0])
            for assigned_worker in root_task.get_workers():
                if assigned_worker is worker:
                    self.workers[worker].deassign_task(root_task)
                else:
                    self.workers[assigned_worker].deassign_task(root_task)
                    assigned_worker.worker_pool.abort_task_on_worker(
                        root_task, assigned_worker)

                    # XXX: Need to abort the task running on other workers.
                    pass

            for (parent_id, success, payload) in report:

                parent_task = self.task_graph.get_task(parent_id)

                if success:
                    (spawned, published, profiling) = payload
                    parent_task.set_profiling(profiling)
                    parent_task.set_state(TASK_COMMITTED)
                    self.record_task_stats(parent_task, worker)
                    for child in spawned:
                        child_task = build_taskpool_task_from_descriptor(
                            child, parent_task)
                        tx.spawn(child_task)
                        parent_task.children.append(child_task)

                    for ref in published:
                        tx.publish(ref, parent_task)

                else:
                    # Only one failed task per-report, at the moment.
                    self.investigate_task_failure(parent_task, payload)
                    self.schedule()
                    return

            tx.commit(self.task_graph)
            self.task_graph.reduce_graph_for_references(
                toplevel_task.expected_outputs)

        # XXX: Need to remove assigned task from worker(s).
        self.schedule()
Exemple #7
0
 def spawn_and_publish(self, spawns, refs, producer=None, taskset=None):
     
     producer_task = None
     if producer is not None:
         producer_task = self.get_task(producer["task_id"])
         taskset = producer_task.taskset
     upd = TaskGraphUpdate()
     for spawn in spawns:
         task_object = build_taskpool_task_from_descriptor(spawn, producer_task, taskset)
         upd.spawn(task_object)
     for ref in refs:
         upd.publish(ref, producer_task)
     upd.commit(self)
Exemple #8
0
    def spawn_tasks(self, parent_task_id, tasks):
        parent_task = self.task_graph.get_task(parent_task_id)

        tx = TaskGraphUpdate()

        for task_descriptor in tasks:
            task_object = build_taskpool_task_from_descriptor(
                task_descriptor, None, parent_task)
            tx.spawn(task_object)

        tx.commit(self.task_graph)
Exemple #9
0
 def _report_tasks(self, report, toplevel_task, worker):
     with self._lock:
 
         tx = TaskGraphUpdate()
         
         root_task = self.task_graph.get_task(report[0][0])
         for assigned_worker in root_task.get_workers():
             if assigned_worker is worker:
                 self.workers[worker].deassign_task(root_task)
             else:
                 self.workers[assigned_worker].deassign_task(root_task)
                 assigned_worker.worker_pool.abort_task_on_worker(root_task, assigned_worker)
                 
                 # XXX: Need to abort the task running on other workers.
                 pass
         
         for (parent_id, success, payload) in report:
             
             parent_task = self.task_graph.get_task(parent_id)
             
             if success:
                 (spawned, published, profiling) = payload
                 parent_task.set_profiling(profiling)
                 parent_task.set_state(TASK_COMMITTED)
                 self.record_task_stats(parent_task, worker)
                 for child in spawned:
                     child_task = build_taskpool_task_from_descriptor(child, parent_task)
                     tx.spawn(child_task)
                     parent_task.children.append(child_task)
                 
                 for ref in published:
                     tx.publish(ref, parent_task)
             
             else:
                 # Only one failed task per-report, at the moment.
                 self.investigate_task_failure(parent_task, payload)
                 self.schedule()
                 return
                 
         tx.commit(self.task_graph)
         self.task_graph.reduce_graph_for_references(toplevel_task.expected_outputs)
         
     # XXX: Need to remove assigned task from worker(s).
     self.schedule()
Exemple #10
0
    def commit_task(self,
                    task_id,
                    bindings,
                    saved_continuation_uri=None,
                    replay_uuid_list=None):

        task = self.task_graph.get_task(task_id)
        tx = TaskGraphUpdate()

        for id, ref in bindings.items():
            tx.publish(ref, task)

        tx.commit(self.task_graph)

        task.state = TASK_COMMITTED
Exemple #11
0
 def publish_refs(self, task_id, refs):
     tx = TaskGraphUpdate()
     task = self.task_graph.get_task(task_id)
     for ref in refs:
         tx.publish(ref, task)
     tx.commit(self.task_graph)
Exemple #12
0
 def publish_refs(self, task_id, refs):
     tx = TaskGraphUpdate()
     task = self.task_graph.get_task(task_id)
     for ref in refs:
         tx.publish(ref, task)
     tx.commit(self.task_graph)
Exemple #13
0
    def default(self, job_id, task_id, action=None):

        try:
            job = self.job_pool.get_job_by_id(job_id)
        except KeyError:
            ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR)
            raise HTTPError(404)

        try:
            task = job.task_graph.get_task(task_id)
        except KeyError:
            ciel.log('No such task: %s in job: %s' % (task_id, job_id),
                     'MASTER', logging.ERROR)
            raise HTTPError(404)

        if cherrypy.request.method == 'GET':
            if action is None:
                return simplejson.dumps(task.as_descriptor(long=True),
                                        cls=SWReferenceJSONEncoder)
            else:
                ciel.log('Invalid operation: cannot GET with an action',
                         'MASTER', logging.ERROR)
                raise HTTPError(405)
        elif cherrypy.request.method != 'POST':
            ciel.log(
                'Invalid operation: only POST is supported for task operations',
                'MASTER', logging.ERROR)
            raise HTTPError(405)

        # Action-handling starts here.

        if action == 'report':
            # Multi-spawn-and-commit
            report_payload = simplejson.loads(
                cherrypy.request.body.read(),
                object_hook=json_decode_object_hook)
            worker = self.worker_pool.get_worker_by_id(
                report_payload['worker'])
            report = report_payload['report']
            job.report_tasks(report, task, worker)
            return

        elif action == 'failed':
            failure_payload = simplejson.loads(
                cherrypy.request.body.read(),
                object_hook=json_decode_object_hook)
            job.investigate_task_failure(task, failure_payload)
            return simplejson.dumps(True)

        elif action == 'publish':
            request_body = cherrypy.request.body.read()
            refs = simplejson.loads(request_body,
                                    object_hook=json_decode_object_hook)

            tx = TaskGraphUpdate()
            for ref in refs:
                tx.publish(ref, task)
            tx.commit(job.task_graph)

            self.backup_sender.publish_refs(task_id, refs)
            ciel.engine.publish('schedule')
            return

        elif action == 'abort':
            # FIXME (maybe): There is currently no abort method on Task.
            task.abort(task_id)
            return

        elif action is None:
            ciel.log('Invalid operation: only GET is supported for tasks',
                     'MASTER', logging.ERROR)
            raise HTTPError(404)
        else:
            ciel.log('Unknown action (%s) on task (%s)' % (action, task_id),
                     'MASTER', logging.ERROR)
            raise HTTPError(404)
Exemple #14
0
    def default(self, job_id, task_id, action=None):
        
        try:
            job = self.job_pool.get_job_by_id(job_id)
        except KeyError:
            ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR)
            raise HTTPError(404)

        try:
            task = job.task_graph.get_task(task_id)
        except KeyError:
            ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)

        if cherrypy.request.method == 'GET':
            if action is None:
                return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder)
            else:
                ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR)
                raise HTTPError(405)
        elif cherrypy.request.method != 'POST':
            ciel.log('Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR)
            raise HTTPError(405)

        # Action-handling starts here.

        if action == 'report':
            # Multi-spawn-and-commit
            report_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            worker = self.worker_pool.get_worker_by_id(report_payload['worker'])
            report = report_payload['report']
            job.report_tasks(report, task, worker)
            return

        elif action == 'failed':
            failure_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            job.investigate_task_failure(task, failure_payload)
            return simplejson.dumps(True)
        
        elif action == 'publish':
            request_body = cherrypy.request.body.read()
            refs = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            
            tx = TaskGraphUpdate()
            for ref in refs:
                tx.publish(ref, task)
            tx.commit(job.task_graph)
            job.schedule()

            self.backup_sender.publish_refs(task_id, refs)
            return
            
        elif action == 'log':
            # Message body is a JSON list containing UNIX timestamp in seconds and a message string.
            request_body = cherrypy.request.body.read()
            timestamp, message = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            ciel.log("%s %f %s" % (task_id, timestamp, message), 'TASK_LOG', logging.INFO)
            
        elif action == 'abort':
            # FIXME (maybe): There is currently no abort method on Task.
            task.abort(task_id)
            return
        
        elif action is None:
            ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR)
            raise HTTPError(404)
        else:
            ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)