def spawn_and_publish(self, spawns, refs, producer=None, taskset=None): producer_task = None if producer is not None: producer_task = self.get_task(producer["task_id"]) taskset = producer_task.taskset upd = TaskGraphUpdate() for spawn in spawns: task_object = build_taskpool_task_from_descriptor(spawn, producer_task, taskset) upd.spawn(task_object) for ref in refs: upd.publish(ref, producer_task) upd.commit(self)
def _report_tasks(self, report, toplevel_task, worker): # XXX SOS22 This does a damn sight more than just reporting the tasks! with self._lock: tx = TaskGraphUpdate() root_task = self.task_graph.get_task(report[0][0]) ciel.log('Received report from task %s with %d entries' % (root_task.task_id, len(report)), 'SCHED', logging.DEBUG) try: self.workers[worker].deassign_task(root_task) except KeyError: # This can happen if we recieve the report after the worker is deemed to have failed. In this case, we should # accept the report and ignore the failed worker. pass for (parent_id, success, payload) in report: ciel.log('Processing report record from task %s' % (parent_id), 'SCHED', logging.DEBUG) parent_task = self.task_graph.get_task(parent_id) if success: ciel.log('Task %s was successful' % (parent_id), 'SCHED', logging.DEBUG) (spawned, published, profiling) = payload parent_task.set_profiling(profiling) parent_task.set_state(TASK_COMMITTED) self.record_task_stats(parent_task, worker) for child in spawned: child_task = build_taskpool_task_from_descriptor(child, parent_task) ciel.log('Task %s spawned task %s' % (parent_id, child_task.task_id), 'SCHED', logging.DEBUG) tx.spawn(child_task) #parent_task.children.append(child_task) for ref in published: ciel.log('Task %s published reference %s' % (parent_id, str(ref)), 'SCHED', logging.DEBUG) tx.publish(ref, parent_task) else: ciel.log('Task %s failed' % (parent_id), 'SCHED', logging.WARN) # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.schedule() return tx.commit(self.task_graph) self.task_graph.reduce_graph_for_references(toplevel_task.expected_outputs) # XXX: Need to remove assigned task from worker(s). self.schedule()
def spawn_and_publish(self, spawns, refs, producer=None, taskset=None): producer_task = None if producer is not None: producer_task = self.get_task(producer["task_id"]) taskset = producer_task.taskset upd = TaskGraphUpdate() for spawn in spawns: task_object = build_taskpool_task_from_descriptor( spawn, producer_task, taskset) upd.spawn(task_object) for ref in refs: upd.publish(ref, producer_task) upd.commit(self)
def default(self, job_id, task_id, action=None): if action == 'report': ciel.stopwatch.multi(starts=["master_task"], laps=["end_to_end"]) try: job = self.job_pool.get_job_by_id(job_id) except KeyError: ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR) raise HTTPError(404) try: task = job.task_graph.get_task(task_id) except KeyError: ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR) raise HTTPError(404) if cherrypy.request.method == 'GET': if action is None: return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder) else: ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR) raise HTTPError(405) elif cherrypy.request.method != 'POST': ciel.log('Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR) raise HTTPError(405) # Action-handling starts here. if action == 'report': # Multi-spawn-and-commit report_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook) worker = self.worker_pool.get_worker_by_id(report_payload['worker']) report = report_payload['report'] job.report_tasks(report, task, worker) return elif action == 'failed': failure_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook) job.investigate_task_failure(task, failure_payload) return simplejson.dumps(True) elif action == 'publish': request_body = cherrypy.request.body.read() refs = simplejson.loads(request_body, object_hook=json_decode_object_hook) tx = TaskGraphUpdate() for ref in refs: tx.publish(ref, task) tx.commit(job.task_graph) job.schedule() self.backup_sender.publish_refs(task_id, refs) return elif action == 'log': # Message body is a JSON list containing UNIX timestamp in seconds and a message string. request_body = cherrypy.request.body.read() timestamp, message = simplejson.loads(request_body, object_hook=json_decode_object_hook) ciel.log("%s %f %s" % (task_id, timestamp, message), 'TASK_LOG', logging.INFO) elif action == 'abort': # FIXME (maybe): There is currently no abort method on Task. task.abort(task_id) return elif action is None: ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR) raise HTTPError(404) else: ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR) raise HTTPError(404)
def _report_tasks(self, report, toplevel_task, worker): with self._lock: tx = TaskGraphUpdate() root_task = self.task_graph.get_task(report[0][0]) ciel.log( 'Received report from task %s with %d entries' % (root_task.task_id, len(report)), 'SCHED', logging.DEBUG) try: self.workers[worker].deassign_task(root_task) except KeyError: # This can happen if we recieve the report after the worker is deemed to have failed. In this case, we should # accept the report and ignore the failed worker. pass for (parent_id, success, payload) in report: ciel.log('Processing report record from task %s' % (parent_id), 'SCHED', logging.DEBUG) parent_task = self.task_graph.get_task(parent_id) if success: ciel.log('Task %s was successful' % (parent_id), 'SCHED', logging.DEBUG) (spawned, published, profiling) = payload parent_task.set_profiling(profiling) parent_task.set_state(TASK_COMMITTED) self.record_task_stats(parent_task, worker) for child in spawned: child_task = build_taskpool_task_from_descriptor( child, parent_task) ciel.log( 'Task %s spawned task %s' % (parent_id, child_task.task_id), 'SCHED', logging.DEBUG) tx.spawn(child_task) #parent_task.children.append(child_task) for ref in published: ciel.log( 'Task %s published reference %s' % (parent_id, str(ref)), 'SCHED', logging.DEBUG) tx.publish(ref, parent_task) else: ciel.log('Task %s failed' % (parent_id), 'SCHED', logging.WARN) # Only one failed task per-report, at the moment. self.investigate_task_failure(parent_task, payload) self.schedule() return tx.commit(self.task_graph) self.task_graph.reduce_graph_for_references( toplevel_task.expected_outputs) # XXX: Need to remove assigned task from worker(s). self.schedule()