Example #1
0
 def spawn_and_publish(self, spawns, refs, producer=None, taskset=None):
     
     producer_task = None
     if producer is not None:
         producer_task = self.get_task(producer["task_id"])
         taskset = producer_task.taskset
     upd = TaskGraphUpdate()
     for spawn in spawns:
         task_object = build_taskpool_task_from_descriptor(spawn, producer_task, taskset)
         upd.spawn(task_object)
     for ref in refs:
         upd.publish(ref, producer_task)
     upd.commit(self)
Example #2
0
    def _report_tasks(self, report, toplevel_task, worker):
        # XXX SOS22 This does a damn sight more than just reporting the tasks!
        with self._lock:
    
            tx = TaskGraphUpdate()
            
            root_task = self.task_graph.get_task(report[0][0])
            
            ciel.log('Received report from task %s with %d entries' % (root_task.task_id, len(report)), 'SCHED', logging.DEBUG)
            
            try:
                self.workers[worker].deassign_task(root_task)
            except KeyError:
                # This can happen if we recieve the report after the worker is deemed to have failed. In this case, we should
                # accept the report and ignore the failed worker.
                pass

            for (parent_id, success, payload) in report:
                
                ciel.log('Processing report record from task %s' % (parent_id), 'SCHED', logging.DEBUG)
                
                parent_task = self.task_graph.get_task(parent_id)
                
                if success:
                    ciel.log('Task %s was successful' % (parent_id), 'SCHED', logging.DEBUG)
                    (spawned, published, profiling) = payload
                    parent_task.set_profiling(profiling)
                    parent_task.set_state(TASK_COMMITTED)
                    self.record_task_stats(parent_task, worker)
                    for child in spawned:
                        child_task = build_taskpool_task_from_descriptor(child, parent_task)
                        ciel.log('Task %s spawned task %s' % (parent_id, child_task.task_id), 'SCHED', logging.DEBUG)
                        tx.spawn(child_task)
                        #parent_task.children.append(child_task)
                    
                    for ref in published:
                        ciel.log('Task %s published reference %s' % (parent_id, str(ref)), 'SCHED', logging.DEBUG)
                        tx.publish(ref, parent_task)
                
                else:
                    ciel.log('Task %s failed' % (parent_id), 'SCHED', logging.WARN)
                    # Only one failed task per-report, at the moment.
                    self.investigate_task_failure(parent_task, payload)
                    self.schedule()
                    return
                    
            tx.commit(self.task_graph)
            self.task_graph.reduce_graph_for_references(toplevel_task.expected_outputs)
            
        # XXX: Need to remove assigned task from worker(s).
        self.schedule()
Example #3
0
    def spawn_and_publish(self, spawns, refs, producer=None, taskset=None):

        producer_task = None
        if producer is not None:
            producer_task = self.get_task(producer["task_id"])
            taskset = producer_task.taskset
        upd = TaskGraphUpdate()
        for spawn in spawns:
            task_object = build_taskpool_task_from_descriptor(
                spawn, producer_task, taskset)
            upd.spawn(task_object)
        for ref in refs:
            upd.publish(ref, producer_task)
        upd.commit(self)
Example #4
0
    def default(self, job_id, task_id, action=None):
        
        if action == 'report':
            ciel.stopwatch.multi(starts=["master_task"], laps=["end_to_end"])
        
        try:
            job = self.job_pool.get_job_by_id(job_id)
        except KeyError:
            ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR)
            raise HTTPError(404)

        try:
            task = job.task_graph.get_task(task_id)
        except KeyError:
            ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)

        if cherrypy.request.method == 'GET':
            if action is None:
                return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder)
            else:
                ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR)
                raise HTTPError(405)
        elif cherrypy.request.method != 'POST':
            ciel.log('Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR)
            raise HTTPError(405)

        # Action-handling starts here.

        if action == 'report':
            # Multi-spawn-and-commit
            report_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            worker = self.worker_pool.get_worker_by_id(report_payload['worker'])
            report = report_payload['report']
            job.report_tasks(report, task, worker)
            return

        elif action == 'failed':
            failure_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            job.investigate_task_failure(task, failure_payload)
            return simplejson.dumps(True)
        
        elif action == 'publish':
            request_body = cherrypy.request.body.read()
            refs = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            
            tx = TaskGraphUpdate()
            for ref in refs:
                tx.publish(ref, task)
            tx.commit(job.task_graph)
            job.schedule()

            self.backup_sender.publish_refs(task_id, refs)
            return
            
        elif action == 'log':
            # Message body is a JSON list containing UNIX timestamp in seconds and a message string.
            request_body = cherrypy.request.body.read()
            timestamp, message = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            ciel.log("%s %f %s" % (task_id, timestamp, message), 'TASK_LOG', logging.INFO)
            
        elif action == 'abort':
            # FIXME (maybe): There is currently no abort method on Task.
            task.abort(task_id)
            return
        
        elif action is None:
            ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR)
            raise HTTPError(404)
        else:
            ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)
Example #5
0
    def default(self, job_id, task_id, action=None):
        
        if action == 'report':
            ciel.stopwatch.multi(starts=["master_task"], laps=["end_to_end"])
        
        try:
            job = self.job_pool.get_job_by_id(job_id)
        except KeyError:
            ciel.log('No such job: %s' % job_id, 'MASTER', logging.ERROR)
            raise HTTPError(404)

        try:
            task = job.task_graph.get_task(task_id)
        except KeyError:
            ciel.log('No such task: %s in job: %s' % (task_id, job_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)

        if cherrypy.request.method == 'GET':
            if action is None:
                return simplejson.dumps(task.as_descriptor(long=True), cls=SWReferenceJSONEncoder)
            else:
                ciel.log('Invalid operation: cannot GET with an action', 'MASTER', logging.ERROR)
                raise HTTPError(405)
        elif cherrypy.request.method != 'POST':
            ciel.log('Invalid operation: only POST is supported for task operations', 'MASTER', logging.ERROR)
            raise HTTPError(405)

        # Action-handling starts here.

        if action == 'report':
            # Multi-spawn-and-commit
            report_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            worker = self.worker_pool.get_worker_by_id(report_payload['worker'])
            report = report_payload['report']
            job.report_tasks(report, task, worker)
            return

        elif action == 'failed':
            failure_payload = simplejson.loads(cherrypy.request.body.read(), object_hook=json_decode_object_hook)
            job.investigate_task_failure(task, failure_payload)
            return simplejson.dumps(True)
        
        elif action == 'publish':
            request_body = cherrypy.request.body.read()
            refs = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            
            tx = TaskGraphUpdate()
            for ref in refs:
                tx.publish(ref, task)
            tx.commit(job.task_graph)
            job.schedule()

            self.backup_sender.publish_refs(task_id, refs)
            return
            
        elif action == 'log':
            # Message body is a JSON list containing UNIX timestamp in seconds and a message string.
            request_body = cherrypy.request.body.read()
            timestamp, message = simplejson.loads(request_body, object_hook=json_decode_object_hook)
            ciel.log("%s %f %s" % (task_id, timestamp, message), 'TASK_LOG', logging.INFO)
            
        elif action == 'abort':
            # FIXME (maybe): There is currently no abort method on Task.
            task.abort(task_id)
            return
        
        elif action is None:
            ciel.log('Invalid operation: only GET is supported for tasks', 'MASTER', logging.ERROR)
            raise HTTPError(404)
        else:
            ciel.log('Unknown action (%s) on task (%s)' % (action, task_id), 'MASTER', logging.ERROR)
            raise HTTPError(404)
Example #6
0
    def _report_tasks(self, report, toplevel_task, worker):
        with self._lock:

            tx = TaskGraphUpdate()

            root_task = self.task_graph.get_task(report[0][0])

            ciel.log(
                'Received report from task %s with %d entries' %
                (root_task.task_id, len(report)), 'SCHED', logging.DEBUG)

            try:
                self.workers[worker].deassign_task(root_task)
            except KeyError:
                # This can happen if we recieve the report after the worker is deemed to have failed. In this case, we should
                # accept the report and ignore the failed worker.
                pass

            for (parent_id, success, payload) in report:

                ciel.log('Processing report record from task %s' % (parent_id),
                         'SCHED', logging.DEBUG)

                parent_task = self.task_graph.get_task(parent_id)

                if success:
                    ciel.log('Task %s was successful' % (parent_id), 'SCHED',
                             logging.DEBUG)
                    (spawned, published, profiling) = payload
                    parent_task.set_profiling(profiling)
                    parent_task.set_state(TASK_COMMITTED)
                    self.record_task_stats(parent_task, worker)
                    for child in spawned:
                        child_task = build_taskpool_task_from_descriptor(
                            child, parent_task)
                        ciel.log(
                            'Task %s spawned task %s' %
                            (parent_id, child_task.task_id), 'SCHED',
                            logging.DEBUG)
                        tx.spawn(child_task)
                        #parent_task.children.append(child_task)

                    for ref in published:
                        ciel.log(
                            'Task %s published reference %s' %
                            (parent_id, str(ref)), 'SCHED', logging.DEBUG)
                        tx.publish(ref, parent_task)

                else:
                    ciel.log('Task %s failed' % (parent_id), 'SCHED',
                             logging.WARN)
                    # Only one failed task per-report, at the moment.
                    self.investigate_task_failure(parent_task, payload)
                    self.schedule()
                    return

            tx.commit(self.task_graph)
            self.task_graph.reduce_graph_for_references(
                toplevel_task.expected_outputs)

        # XXX: Need to remove assigned task from worker(s).
        self.schedule()