def spawn_tasks(self, parent_task_id, tasks): parent_task = self.task_graph.get_task(parent_task_id) tx = TaskGraphUpdate() for task_descriptor in tasks: task_object = build_taskpool_task_from_descriptor(task_descriptor, None, parent_task) tx.spawn(task_object) tx.commit(self.task_graph)
def commit_task(self, task_id, bindings, saved_continuation_uri=None, replay_uuid_list=None): task = self.task_graph.get_task(task_id) tx = TaskGraphUpdate() for id, ref in bindings.items(): tx.publish(ref, task) tx.commit(self.task_graph) task.state = TASK_COMMITTED
def report_tasks(self, report, toplevel_task_id): task = self.task_graph.get_task(toplevel_task_id) tx = TaskGraphUpdate() for (parent_id, success, payload) in report: parent_task = self.task_graph.get_task(parent_id) if success: (spawned, published) = payload for child in spawned: child_task = build_taskpool_task_from_descriptor( child, parent_task) tx.spawn(child_task) parent_task.children.append(child_task) for ref in published: tx.publish(ref, parent_task) else: # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.lazy_task_pool.worker_pool.worker_idle( toplevel_task_id.worker) ciel.engine.publish('schedule') return tx.commit(self.task_graph)
def report_tasks(self, report, toplevel_task_id): task = self.task_graph.get_task(toplevel_task_id) tx = TaskGraphUpdate() for (parent_id, success, payload) in report: parent_task = self.task_graph.get_task(parent_id) if success: (spawned, published) = payload for child in spawned: child_task = build_taskpool_task_from_descriptor(child, parent_task) tx.spawn(child_task) parent_task.children.append(child_task) for ref in published: tx.publish(ref, parent_task) else: # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.lazy_task_pool.worker_pool.worker_idle(toplevel_task_id.worker) ciel.engine.publish('schedule') return tx.commit(self.task_graph)
def spawn_and_publish(self, spawns, refs, producer=None, taskset=None): producer_task = None if producer is not None: producer_task = self.get_task(producer["task_id"]) taskset = producer_task.taskset upd = TaskGraphUpdate() for spawn in spawns: task_object = build_taskpool_task_from_descriptor( spawn, producer_task, taskset) upd.spawn(task_object) for ref in refs: upd.publish(ref, producer_task) upd.commit(self)
def _report_tasks(self, report, toplevel_task, worker): with self._lock: tx = TaskGraphUpdate() root_task = self.task_graph.get_task(report[0][0]) for assigned_worker in root_task.get_workers(): if assigned_worker is worker: self.workers[worker].deassign_task(root_task) else: self.workers[assigned_worker].deassign_task(root_task) assigned_worker.worker_pool.abort_task_on_worker( root_task, assigned_worker) # XXX: Need to abort the task running on other workers. pass for (parent_id, success, payload) in report: parent_task = self.task_graph.get_task(parent_id) if success: (spawned, published, profiling) = payload parent_task.set_profiling(profiling) parent_task.set_state(TASK_COMMITTED) self.record_task_stats(parent_task, worker) for child in spawned: child_task = build_taskpool_task_from_descriptor( child, parent_task) tx.spawn(child_task) parent_task.children.append(child_task) for ref in published: tx.publish(ref, parent_task) else: # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.schedule() return tx.commit(self.task_graph) self.task_graph.reduce_graph_for_references( toplevel_task.expected_outputs) # XXX: Need to remove assigned task from worker(s). self.schedule()
def spawn_and_publish(self, spawns, refs, producer=None, taskset=None): producer_task = None if producer is not None: producer_task = self.get_task(producer["task_id"]) taskset = producer_task.taskset upd = TaskGraphUpdate() for spawn in spawns: task_object = build_taskpool_task_from_descriptor(spawn, producer_task, taskset) upd.spawn(task_object) for ref in refs: upd.publish(ref, producer_task) upd.commit(self)
def spawn_tasks(self, parent_task_id, tasks): parent_task = self.task_graph.get_task(parent_task_id) tx = TaskGraphUpdate() for task_descriptor in tasks: task_object = build_taskpool_task_from_descriptor( task_descriptor, None, parent_task) tx.spawn(task_object) tx.commit(self.task_graph)
def _report_tasks(self, report, toplevel_task, worker): with self._lock: tx = TaskGraphUpdate() root_task = self.task_graph.get_task(report[0][0]) for assigned_worker in root_task.get_workers(): if assigned_worker is worker: self.workers[worker].deassign_task(root_task) else: self.workers[assigned_worker].deassign_task(root_task) assigned_worker.worker_pool.abort_task_on_worker(root_task, assigned_worker) # XXX: Need to abort the task running on other workers. pass for (parent_id, success, payload) in report: parent_task = self.task_graph.get_task(parent_id) if success: (spawned, published, profiling) = payload parent_task.set_profiling(profiling) parent_task.set_state(TASK_COMMITTED) self.record_task_stats(parent_task, worker) for child in spawned: child_task = build_taskpool_task_from_descriptor(child, parent_task) tx.spawn(child_task) parent_task.children.append(child_task) for ref in published: tx.publish(ref, parent_task) else: # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.schedule() return tx.commit(self.task_graph) self.task_graph.reduce_graph_for_references(toplevel_task.expected_outputs) # XXX: Need to remove assigned task from worker(s). self.schedule()
def publish_refs(self, task_id, refs): tx = TaskGraphUpdate() task = self.task_graph.get_task(task_id) for ref in refs: tx.publish(ref, task) tx.commit(self.task_graph)
def default(self, job_id, task_id, action=None): try: job = self.job_pool.get_job_by_id(job_id) except KeyError: ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR) raise HTTPError(404) try: task = job.task_graph.get_task(task_id) except KeyError: ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR) raise HTTPError(404) if cherrypy.request.method == 'GET': if action is None: return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder) else: ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR) raise HTTPError(405) elif cherrypy.request.method != 'POST': ciel.log( 'Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR) raise HTTPError(405) # Action-handling starts here. if action == 'report': # Multi-spawn-and-commit report_payload = simplejson.loads( cherrypy.request.body.read(), object_hook=json_decode_object_hook) worker = self.worker_pool.get_worker_by_id( report_payload['worker']) report = report_payload['report'] job.report_tasks(report, task, worker) return elif action == 'failed': failure_payload = simplejson.loads( cherrypy.request.body.read(), object_hook=json_decode_object_hook) job.investigate_task_failure(task, failure_payload) return simplejson.dumps(True) elif action == 'publish': request_body = cherrypy.request.body.read() refs = simplejson.loads(request_body, object_hook=json_decode_object_hook) tx = TaskGraphUpdate() for ref in refs: tx.publish(ref, task) tx.commit(job.task_graph) self.backup_sender.publish_refs(task_id, refs) ciel.engine.publish('schedule') return elif action == 'abort': # FIXME (maybe): There is currently no abort method on Task. task.abort(task_id) return elif action is None: ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR) raise HTTPError(404) else: ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR) raise HTTPError(404)
def default(self, job_id, task_id, action=None): try: job = self.job_pool.get_job_by_id(job_id) except KeyError: ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR) raise HTTPError(404) try: task = job.task_graph.get_task(task_id) except KeyError: ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR) raise HTTPError(404) if cherrypy.request.method == 'GET': if action is None: return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder) else: ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR) raise HTTPError(405) elif cherrypy.request.method != 'POST': ciel.log('Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR) raise HTTPError(405) # Action-handling starts here. if action == 'report': # Multi-spawn-and-commit report_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook) worker = self.worker_pool.get_worker_by_id(report_payload['worker']) report = report_payload['report'] job.report_tasks(report, task, worker) return elif action == 'failed': failure_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook) job.investigate_task_failure(task, failure_payload) return simplejson.dumps(True) elif action == 'publish': request_body = cherrypy.request.body.read() refs = simplejson.loads(request_body, object_hook=json_decode_object_hook) tx = TaskGraphUpdate() for ref in refs: tx.publish(ref, task) tx.commit(job.task_graph) job.schedule() self.backup_sender.publish_refs(task_id, refs) return elif action == 'log': # Message body is a JSON list containing UNIX timestamp in seconds and a message string. request_body = cherrypy.request.body.read() timestamp, message = simplejson.loads(request_body, object_hook=json_decode_object_hook) ciel.log("%s %f %s" % (task_id, timestamp, message), 'TASK_LOG', logging.INFO) elif action == 'abort': # FIXME (maybe): There is currently no abort method on Task. task.abort(task_id) return elif action is None: ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR) raise HTTPError(404) else: ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR) raise HTTPError(404)