def __init__(self, parent, backend, job_id, name): super(Job, self).__init__() self.parent = parent self.backend = backend self.event_handler = self.parent.event_handler self.job_id = job_id self.name = name self.state = JobState() # tasks themselves aren't hashable, so we need a secondary lookup self.tasks = {} self.next_run = None self.cron_schedule = None self.cron_iter = None self.run_log = None self.completion_lock = threading.Lock() self._set_status('waiting') self.commit()
class Job(DAG): """ Controller for a collection and graph of Task objects. Emitted events: job_complete: On successful completion of the job. Returns the current serialization of the job with run logs. job_failed: On failed completion of the job. Returns the current serialization of the job with run logs. """ def __init__(self, parent, backend, job_id, name): super(Job, self).__init__() self.parent = parent self.backend = backend self.event_handler = self.parent.event_handler self.job_id = job_id self.name = name self.state = JobState() # tasks themselves aren't hashable, so we need a secondary lookup self.tasks = {} self.next_run = None self.cron_schedule = None self.cron_iter = None self.run_log = None self.completion_lock = threading.Lock() self.notes = None self._set_status("waiting") self.commit() def commit(self): """ Store metadata on this Job to the backend. """ self.backend.commit_job(self._serialize()) self.parent.commit() def add_task(self, command, name=None, **kwargs): """ Adds a new Task to the graph with no edges. """ if not self.state.allow_change_graph: raise DagobahError("job's graph is immutable in its current state: %s" % self.state.status) if name is None: name = command new_task = Task(self, command, name, **kwargs) self.tasks[name] = new_task self.add_node(name) self.commit() def add_dependency(self, from_task_name, to_task_name): """ Add a dependency between two tasks. """ if not self.state.allow_change_graph: raise DagobahError("job's graph is immutable in its current state: %s" % self.state.status) self.add_edge(from_task_name, to_task_name) self.commit() def delete_task(self, task_name): """ Deletes the named Task in this Job. """ if not self.state.allow_change_graph: raise DagobahError("job's graph is immutable in its current state: %s" % self.state.status) if task_name not in self.tasks: raise DagobahError("task %s does not exist" % task_name) self.tasks.pop(task_name) self.delete_node(task_name) self.commit() def delete_dependency(self, from_task_name, to_task_name): """ Delete a dependency between two tasks. """ if not self.state.allow_change_graph: raise DagobahError("job's graph is immutable in its current state: %s" % self.state.status) self.delete_edge(from_task_name, to_task_name) self.commit() def schedule(self, cron_schedule, base_datetime=None): """ Schedules the job to run periodically using Cron syntax. """ if not self.state.allow_change_schedule: raise DagobahError("job's schedule cannot be changed in state: %s" % self.state.status) if cron_schedule is None: self.cron_schedule = None self.cron_iter = None self.next_run = None else: if base_datetime is None: base_datetime = datetime.utcnow() self.cron_schedule = cron_schedule self.cron_iter = croniter(cron_schedule, base_datetime) self.next_run = self.cron_iter.get_next(datetime) self.commit() def start(self): """ Begins the job by kicking off all tasks with no dependencies. """ if not self.state.allow_start: raise DagobahError("job cannot be started in its current state; " + "it is probably already running") is_valid, reason = self.validate() if not is_valid: raise DagobahError(reason) # don't increment if the job was run manually if self.cron_iter and datetime.utcnow() > self.next_run: self.next_run = self.cron_iter.get_next(datetime) self.run_log = { "job_id": self.job_id, "name": self.name, "parent_id": self.parent.dagobah_id, "log_id": self.backend.get_new_log_id(), "start_time": datetime.utcnow(), "tasks": {}, } self._set_status("running") for task in self.tasks.itervalues(): task.reset() for task_name in self.ind_nodes(): self._put_task_in_run_log(task_name) self.tasks[task_name].start() self._commit_run_log() def retry(self): """ Restarts failed tasks of a job. """ failed_task_names = [] for task_name, log in self.run_log["tasks"].items(): if log.get("success", True) == False: failed_task_names.append(task_name) if len(failed_task_names) == 0: raise DagobahError("no failed tasks to retry") self._set_status("running") self.run_log["last_retry_time"] = datetime.utcnow() for task_name in failed_task_names: self._put_task_in_run_log(task_name) self.tasks[task_name].start() self._commit_run_log() def terminate_all(self): """ Terminate all currently running jobs. """ for task in self.tasks.itervalues(): if task.started_at and not task.completed_at: task.terminate() def kill_all(self): """ Kill all currently running jobs. """ for task in self.tasks.itervalues(): if task.started_at and not task.completed_at: task.kill() def edit(self, **kwargs): """ Change this Job's name. This will affect the historical data available for this Job, e.g. past run logs will no longer be accessible. """ if not self.state.allow_edit_job: raise DagobahError("job cannot be edited in its current state") if "name" in kwargs and isinstance(kwargs["name"], str): if not self.parent._name_is_available(kwargs["name"]): raise DagobahError("new job name %s is not available" % kwargs["name"]) for key in ["name"]: if key in kwargs and isinstance(kwargs[key], str): setattr(self, key, kwargs[key]) self.parent.commit(cascade=True) def update_job_notes(self, job_name, notes): if not self.state.allow_edit_job: raise DagobahError("job cannot be edited in its current state") setattr(self, "notes", notes) self.parent.commit(cascade=True) def edit_task(self, task_name, **kwargs): """ Change the name of a Task owned by this Job. This will affect the historical data available for this Task, e.g. past run logs will no longer be accessible. """ if not self.state.allow_edit_task: raise DagobahError("tasks cannot be edited in this job's " + "current state") if task_name not in self.tasks: raise DagobahError("task %s not found" % task_name) if "name" in kwargs and isinstance(kwargs["name"], str): if kwargs["name"] in self.tasks: raise DagobahError("task name %s is unavailable" % kwargs["name"]) task = self.tasks[task_name] for key in ["name", "command"]: if key in kwargs and isinstance(kwargs[key], str): setattr(task, key, kwargs[key]) if "soft_timeout" in kwargs: task.set_soft_timeout(kwargs["soft_timeout"]) if "hard_timeout" in kwargs: task.set_hard_timeout(kwargs["hard_timeout"]) if "host_id" in kwargs: task.set_host_id(kwargs["host_id"]) if "name" in kwargs and isinstance(kwargs["name"], str): self.rename_edges(task_name, kwargs["name"]) self.tasks[kwargs["name"]] = task del self.tasks[task_name] self.parent.commit(cascade=True) def _complete_task(self, task_name, **kwargs): """ Marks this task as completed. Kwargs are stored in the run log. """ self.run_log["tasks"][task_name] = kwargs for node in self.downstream(task_name): self._start_if_ready(node) try: self.backend.acquire_lock() self._commit_run_log() except: raise finally: self.backend.release_lock() if kwargs.get("success", None) == False: task = self.tasks[task_name] try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit("task_failed", task._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() self._on_completion() def _put_task_in_run_log(self, task_name): """ Initializes the run log task entry for this task. """ data = {"start_time": datetime.utcnow(), "command": self.tasks[task_name].command} self.run_log["tasks"][task_name] = data def _is_complete(self): """ Returns Boolean of whether the Job has completed. """ for log in self.run_log["tasks"].itervalues(): if "success" not in log: # job has not returned yet return False return True def _on_completion(self): """ Checks to see if the Job has completed, and cleans up if it has. """ if self.state.status != "running" or (not self._is_complete()): self.completion_lock.release() return for job, results in self.run_log["tasks"].iteritems(): if results.get("success", False) == False: self._set_status("failed") try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit("job_failed", self._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() break if self.state.status != "failed": self._set_status("waiting") self.run_log = {} try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit("job_complete", self._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() self.completion_lock.release() def _start_if_ready(self, task_name): """ Start this task if all its dependencies finished successfully. """ task = self.tasks[task_name] dependencies = self._dependencies(task_name) for dependency in dependencies: if self.run_log["tasks"].get(dependency, {}).get("success", False) == True: continue return self._put_task_in_run_log(task_name) task.start() def _set_status(self, status): """ Enforces enum-like behavior on the status field. """ try: self.state.set_status(status) except: raise DagobahError("could not set status %s" % status) def _commit_run_log(self): """" Commit the current run log to the backend. """ self.backend.commit_log(self.run_log) def _serialize(self, include_run_logs=False, strict_json=False): """ Serialize a representation of this Job to a Python dict object. """ # return tasks in sorted order if graph is in a valid state try: topo_sorted = self._topological_sort() t = [ self.tasks[task]._serialize(include_run_logs=include_run_logs, strict_json=strict_json) for task in topo_sorted ] except: t = [ task._serialize(include_run_logs=include_run_logs, strict_json=strict_json) for task in self.tasks.itervalues() ] result = { "job_id": self.job_id, "name": self.name, "parent_id": self.parent.dagobah_id, "tasks": t, "dependencies": {k: list(v) for k, v in self.graph.iteritems()}, "status": self.state.status, "cron_schedule": self.cron_schedule, "next_run": self.next_run, "notes": self.notes, } if strict_json: result = json.loads(json.dumps(result, cls=StrictJSONEncoder)) return result
class Job(DAG): """ Controller for a collection and graph of Task objects. Emitted events: job_complete: On successful completion of the job. Returns the current serialization of the job with run logs. job_failed: On failed completion of the job. Returns the current serialization of the job with run logs. """ def __init__(self, parent, backend, job_id, name): super(Job, self).__init__() self.parent = parent self.backend = backend self.event_handler = self.parent.event_handler self.job_id = job_id self.name = name self.state = JobState() # tasks themselves aren't hashable, so we need a secondary lookup self.tasks = {} self.next_run = None self.cron_schedule = None self.cron_iter = None self.run_log = None self.completion_lock = threading.Lock() self._set_status('waiting') self.commit() def commit(self): """ Store metadata on this Job to the backend. """ self.backend.commit_job(self._serialize()) self.parent.commit() def add_task(self, command, name=None, **kwargs): """ Adds a new Task to the graph with no edges. """ if not self.state.allow_change_graph: raise DagobahError( "job's graph is immutable in its current state: %s" % self.state.status) if name is None: name = command new_task = Task(self, command, name, **kwargs) self.tasks[name] = new_task self.add_node(name) self.commit() def add_dependency(self, from_task_name, to_task_name): """ Add a dependency between two tasks. """ if not self.state.allow_change_graph: raise DagobahError( "job's graph is immutable in its current state: %s" % self.state.status) self.add_edge(from_task_name, to_task_name) self.commit() def delete_task(self, task_name): """ Deletes the named Task in this Job. """ if not self.state.allow_change_graph: raise DagobahError( "job's graph is immutable in its current state: %s" % self.state.status) if task_name not in self.tasks: raise DagobahError('task %s does not exist' % task_name) self.tasks.pop(task_name) self.delete_node(task_name) self.commit() def delete_dependency(self, from_task_name, to_task_name): """ Delete a dependency between two tasks. """ if not self.state.allow_change_graph: raise DagobahError( "job's graph is immutable in its current state: %s" % self.state.status) self.delete_edge(from_task_name, to_task_name) self.commit() def schedule(self, cron_schedule, base_datetime=None): """ Schedules the job to run periodically using Cron syntax. """ if not self.state.allow_change_schedule: raise DagobahError( "job's schedule cannot be changed in state: %s" % self.state.status) if cron_schedule is None: self.cron_schedule = None self.cron_iter = None self.next_run = None else: if base_datetime is None: base_datetime = datetime.utcnow() self.cron_schedule = cron_schedule self.cron_iter = croniter(cron_schedule, base_datetime) self.next_run = self.cron_iter.get_next(datetime) self.commit() def start(self): """ Begins the job by kicking off all tasks with no dependencies. """ if not self.state.allow_start: raise DagobahError('job cannot be started in its current state; ' + 'it is probably already running') is_valid, reason = self.validate() if not is_valid: raise DagobahError(reason) # don't increment if the job was run manually if self.cron_iter and datetime.utcnow() > self.next_run: self.next_run = self.cron_iter.get_next(datetime) self.run_log = { 'job_id': self.job_id, 'name': self.name, 'parent_id': self.parent.dagobah_id, 'log_id': self.backend.get_new_log_id(), 'start_time': datetime.utcnow(), 'tasks': {} } self._set_status('running') for task in self.tasks.itervalues(): task.reset() for task_name in self.ind_nodes(): self._put_task_in_run_log(task_name) self.tasks[task_name].start() self._commit_run_log() def retry(self): """ Restarts failed tasks of a job. """ failed_task_names = [] for task_name, log in self.run_log['tasks'].items(): if log.get('success', True) == False: failed_task_names.append(task_name) if len(failed_task_names) == 0: raise DagobahError('no failed tasks to retry') self._set_status('running') self.run_log['last_retry_time'] = datetime.utcnow() for task_name in failed_task_names: self._put_task_in_run_log(task_name) self.tasks[task_name].start() self._commit_run_log() def terminate_all(self): """ Terminate all currently running jobs. """ for task in self.tasks.itervalues(): if task.started_at and not task.completed_at: task.terminate() def kill_all(self): """ Kill all currently running jobs. """ for task in self.tasks.itervalues(): if task.started_at and not task.completed_at: task.kill() def edit(self, **kwargs): """ Change this Job's name. This will affect the historical data available for this Job, e.g. past run logs will no longer be accessible. """ if not self.state.allow_edit_job: raise DagobahError('job cannot be edited in its current state') if 'name' in kwargs and isinstance(kwargs['name'], str): if not self.parent._name_is_available(kwargs['name']): raise DagobahError('new job name %s is not available' % kwargs['name']) for key in ['name']: if key in kwargs and isinstance(kwargs[key], str): setattr(self, key, kwargs[key]) self.parent.commit(cascade=True) def edit_task(self, task_name, **kwargs): """ Change the name of a Task owned by this Job. This will affect the historical data available for this Task, e.g. past run logs will no longer be accessible. """ if not self.state.allow_edit_task: raise DagobahError("tasks cannot be edited in this job's " + "current state") if task_name not in self.tasks: raise DagobahError('task %s not found' % task_name) if 'name' in kwargs and isinstance(kwargs['name'], str): if kwargs['name'] in self.tasks: raise DagobahError('task name %s is unavailable' % kwargs['name']) task = self.tasks[task_name] for key in ['name', 'command']: if key in kwargs and isinstance(kwargs[key], str): setattr(task, key, kwargs[key]) if 'soft_timeout' in kwargs: task.set_soft_timeout(kwargs['soft_timeout']) if 'hard_timeout' in kwargs: task.set_hard_timeout(kwargs['hard_timeout']) if 'name' in kwargs and isinstance(kwargs['name'], str): self.rename_edges(task_name, kwargs['name']) self.tasks[kwargs['name']] = task del self.tasks[task_name] self.parent.commit(cascade=True) def _complete_task(self, task_name, **kwargs): """ Marks this task as completed. Kwargs are stored in the run log. """ self.run_log['tasks'][task_name] = kwargs for node in self.downstream(task_name): self._start_if_ready(node) try: self.backend.acquire_lock() self._commit_run_log() except: raise finally: self.backend.release_lock() if kwargs.get('success', None) == False: task = self.tasks[task_name] try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit( 'task_failed', task._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() self._on_completion() def _put_task_in_run_log(self, task_name): """ Initializes the run log task entry for this task. """ data = { 'start_time': datetime.utcnow(), 'command': self.tasks[task_name].command } self.run_log['tasks'][task_name] = data def _is_complete(self): """ Returns Boolean of whether the Job has completed. """ for log in self.run_log['tasks'].itervalues(): if 'success' not in log: # job has not returned yet return False return True def _on_completion(self): """ Checks to see if the Job has completed, and cleans up if it has. """ if self.state.status != 'running' or (not self._is_complete()): self.completion_lock.release() return for job, results in self.run_log['tasks'].iteritems(): if results.get('success', False) == False: self._set_status('failed') try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit( 'job_failed', self._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() break if self.state.status != 'failed': self._set_status('waiting') self.run_log = {} try: self.backend.acquire_lock() if self.event_handler: self.event_handler.emit( 'job_complete', self._serialize(include_run_logs=True)) except: raise finally: self.backend.release_lock() self.completion_lock.release() def _start_if_ready(self, task_name): """ Start this task if all its dependencies finished successfully. """ task = self.tasks[task_name] dependencies = self._dependencies(task_name) for dependency in dependencies: if self.run_log['tasks'].get(dependency, {}).get('success', False) == True: continue return self._put_task_in_run_log(task_name) task.start() def _set_status(self, status): """ Enforces enum-like behavior on the status field. """ try: self.state.set_status(status) except: raise DagobahError('could not set status %s' % status) def _commit_run_log(self): """" Commit the current run log to the backend. """ self.backend.commit_log(self.run_log) def _serialize(self, include_run_logs=False, strict_json=False): """ Serialize a representation of this Job to a Python dict object. """ # return tasks in sorted order if graph is in a valid state try: topo_sorted = self._topological_sort() t = [ self.tasks[task]._serialize(include_run_logs=include_run_logs, strict_json=strict_json) for task in topo_sorted ] except: t = [ task._serialize(include_run_logs=include_run_logs, strict_json=strict_json) for task in self.tasks.itervalues() ] dependencies = {} for k, v in self.graph.iteritems(): dependencies[k] = list(v) result = { 'job_id': self.job_id, 'name': self.name, 'parent_id': self.parent.dagobah_id, 'tasks': t, 'dependencies': dependencies, 'status': self.state.status, 'cron_schedule': self.cron_schedule, 'next_run': self.next_run } if strict_json: result = json.loads(json.dumps(result, cls=StrictJSONEncoder)) return result