def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([ (ref.id, ref) for ref in root_task_descriptor["inputs"] ]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"])
def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([(ref.id, ref) for ref in root_task_descriptor["inputs"]]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"])
def __init__(self, id, worker, tickets=1000000): self.id = id self.incoming_queues = {} self.runnable_queues = {} for scheduling_class in worker.scheduling_classes.keys(): self.incoming_queues[scheduling_class] = Queue.Queue() self.runnable_queues[scheduling_class] = Queue.Queue() self.reference_cache = {} self.task_graph = LocalTaskGraph(worker.execution_features, runnable_queues=self.runnable_queues) self.active_or_queued_tasksets = 0 self.running_tasks = 0 self.active_tasksets = {} self.tickets = tickets self.job_aborted = False self._tasksets_lock = threading.Lock() self.task_cost = INITIAL_TASK_COST
class TaskSetExecutionRecord: def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([(ref.id, ref) for ref in root_task_descriptor["inputs"]]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"]) def run(self): ciel.log.error("Running taskset starting at %s" % self.initial_td["task_id"], "TASKEXEC", logging.INFO) while not self.job_output.is_complete(): next_td = self.task_graph.get_runnable_task() if next_td is None: ciel.log.error("No more runnable tasks", "TASKEXEC", logging.INFO) break next_td["inputs"] = [self.retrieve_ref(ref) for ref in next_td["dependencies"]] task_record = TaskExecutionRecord(next_td, self, self.execution_features, self.block_store, self.master_proxy, self.worker) with self._lock: self.current_task = task_record self.current_td = next_td try: task_record.run() except: ciel.log.error('Error during executor task execution', 'TASKEXEC', logging.ERROR, True) with self._lock: self.current_task.cleanup() self.current_task = None self.current_td = None self.task_records.append(task_record) if task_record.success: self.task_graph.spawn_and_publish(task_record.spawned_tasks, task_record.published_refs, next_td) else: break ciel.log.error("Taskset complete", "TASKEXEC", logging.INFO) def retrieve_ref(self, ref): if ref.is_consumable(): return ref else: try: return self.reference_cache[ref.id] except KeyError: raise ReferenceUnavailableException(ref.id) def publish_ref(self, ref): self.reference_cache[ref.id] = ref def abort_task(self, task_id): with self._lock: if self.current_td["task_id"] == task_id: self.current_task.executor.abort()
class TaskSetExecutionRecord: def __init__(self, root_task_descriptor, block_store, master_proxy, execution_features, worker): self._lock = Lock() self.task_records = [] self.current_task = None self.current_td = None self.block_store = block_store self.master_proxy = master_proxy self.execution_features = execution_features self.worker = worker self.reference_cache = dict([ (ref.id, ref) for ref in root_task_descriptor["inputs"] ]) self.initial_td = root_task_descriptor self.task_graph = LocalTaskGraph(execution_features, [self.initial_td["task_id"]]) self.job_output = LocalJobOutput(self.initial_td["expected_outputs"]) for ref in self.initial_td["expected_outputs"]: self.task_graph.subscribe(ref, self.job_output) self.task_graph.spawn_and_publish([self.initial_td], self.initial_td["inputs"]) def run(self): ciel.log.error( "Running taskset starting at %s" % self.initial_td["task_id"], "TASKEXEC", logging.INFO) while not self.job_output.is_complete(): next_td = self.task_graph.get_runnable_task() if next_td is None: ciel.log.error("No more runnable tasks", "TASKEXEC", logging.INFO) break next_td["inputs"] = [ self.retrieve_ref(ref) for ref in next_td["dependencies"] ] task_record = TaskExecutionRecord(next_td, self, self.execution_features, self.block_store, self.master_proxy, self.worker) with self._lock: self.current_task = task_record self.current_td = next_td try: task_record.run() except: ciel.log.error('Error during executor task execution', 'TASKEXEC', logging.ERROR, True) with self._lock: self.current_task.cleanup() self.current_task = None self.current_td = None self.task_records.append(task_record) if task_record.success: self.task_graph.spawn_and_publish(task_record.spawned_tasks, task_record.published_refs, next_td) else: break ciel.log.error("Taskset complete", "TASKEXEC", logging.INFO) def retrieve_ref(self, ref): if ref.is_consumable(): return ref else: try: return self.reference_cache[ref.id] except KeyError: raise ReferenceUnavailableException(ref.id) def publish_ref(self, ref): self.reference_cache[ref.id] = ref def abort_task(self, task_id): with self._lock: if self.current_td["task_id"] == task_id: self.current_task.executor.abort()