def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker, job, job_manager): self.id = root_task_descriptor['task_id'] self._record_list_lock = threading.Lock() self.task_records = [] self.block_store = worker.block_store self.master_proxy = worker.master_proxy self.execution_features = worker.execution_features self.worker = worker self.reference_cache = job.reference_cache # XXX: Should possibly combine_with()? for ref in root_task_descriptor['inputs']: self.reference_cache[ref.id] = ref self.initial_td = root_task_descriptor self.task_graph = job.task_graph self._refcount = 0 self.job = job self.job_manager = job_manager self.aborted = False # LocalJobOutput gets self so that it can notify us when done. self.job_output = LocalJobOutput(self.initial_td["expected_outputs"], self)
def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([ (ref.id, ref) for ref in root_task_descriptor["inputs"] ]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"])
def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([(ref.id, ref) for ref in root_task_descriptor["inputs"]]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"])
class TaskSetExecutionRecord: def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([(ref.id, ref) for ref in root_task_descriptor["inputs"]]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"]) def run(self): ciel.log.error("Running taskset starting at %s" % self.initial_td["task_id"], "TASKEXEC", logging.INFO) while not self.job_output.is_complete(): next_td = self.task_graph.get_runnable_task() if next_td is None: ciel.log.error("No more runnable tasks", "TASKEXEC", logging.INFO) break next_td["inputs"] = [self.retrieve_ref(ref) for ref in next_td["dependencies"]] task_record = TaskExecutionRecord(next_td, self, self.execution_features, self.block_store, self.master_proxy, self.worker) with self._lock: self.current_task = task_record self.current_td = next_td try: task_record.run() except: ciel.log.error('Error during executor task execution', 'TASKEXEC', logging.ERROR, True) with self._lock: self.current_task.cleanup() self.current_task = None self.current_td = None self.task_records.append(task_record) if task_record.success: self.task_graph.spawn_and_publish(task_record.spawned_tasks, task_record.published_refs, next_td) else: break ciel.log.error("Taskset complete", "TASKEXEC", logging.INFO) def retrieve_ref(self, ref): if ref.is_consumable(): return ref else: try: return self.reference_cache[ref.id] except KeyError: raise ReferenceUnavailableException(ref.id) def publish_ref(self, ref): self.reference_cache[ref.id] = ref def abort_task(self, task_id): with self._lock: if self.current_td["task_id"] == task_id: self.current_task.executor.abort()
class TaskSetExecutionRecord: def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([ (ref.id, ref) for ref in root_task_descriptor["inputs"] ]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"]) def run(self): ciel.log.error( "Running taskset starting at %s" % self.initial_td["task_id"], "TASKEXEC", logging.INFO) while not self.job_output.is_complete(): next_td = self.task_graph.get_runnable_task() if next_td is None: ciel.log.error("No more runnable tasks", "TASKEXEC", logging.INFO) break next_td["inputs"] = [ self.retrieve_ref(ref) for ref in next_td["dependencies"] ] task_record = TaskExecutionRecord(next_td, self, self.execution_features, self.block_store, self.master_proxy, self.worker) with self._lock: self.current_task = task_record self.current_td = next_td try: task_record.run() except: ciel.log.error('Error during executor task execution', 'TASKEXEC', logging.ERROR, True) with self._lock: self.current_task.cleanup() self.current_task = None self.current_td = None self.task_records.append(task_record) if task_record.success: self.task_graph.spawn_and_publish(task_record.spawned_tasks, task_record.published_refs, next_td) else: break ciel.log.error("Taskset complete", "TASKEXEC", logging.INFO) def retrieve_ref(self, ref): if ref.is_consumable(): return ref else: try: return self.reference_cache[ref.id] except KeyError: raise ReferenceUnavailableException(ref.id) def publish_ref(self, ref): self.reference_cache[ref.id] = ref def abort_task(self, task_id): with self._lock: if self.current_td["task_id"] == task_id: self.current_task.executor.abort()