def update_downstream(self): """ Enqueue downstream tasks that can run since this one has completed. If this runs in a separate transaction from saving the task status, then we cannot miss a task because of concurrency. """ downstream = Task.objects.filter( run=self.run, template__upstream=self.template, status__in=[Task.WAITING, Task.UPSTREAM_FAILED]).select_for_update() for task in downstream: if self.status == Task.SUCCEEDED: waiting_for = Task.objects.filter( run=self.run, template__downstream=task.template_id).exclude( status=Task.SUCCEEDED).exists() if not waiting_for: logger.info( 'Submitting %s because all dependencies are complete', task) task.enqueue() elif task.status == Task.WAITING: # my status is failed or upstream_failed, fail anything waiting downstream logger.info('Setting status of %s to upstream_failed', task) task.status = Task.UPSTREAM_FAILED task.save() task.update_downstream()
def mark_terminated(self, execution_ids: typing.Iterable[int]): """ Set the deadline to now for the given execution_ids so they are terminated in the next loop. """ now = time.monotonic() for execution_id in execution_ids: if execution_id in self.running: self.running[execution_id].deadline = now else: logger.info('Execution #%s is not running, ignoring termination request', execution_id)
def save_results(self): """Persist the subprocess results to the database""" for _ in range(len(self.results)): with transaction.atomic(): result = self.results[0] if result.stdout or result.stderr: Execution.update_output(result.execution_id, result.stdout, result.stderr) if result.returncode is not None: execution = Execution.objects.get(id=result.execution_id) logger.info('Task %s execution %s exited with code %s', execution.task.id, execution.id, result.returncode) execution.mark_finished(result.returncode) # pop after doing the updates have completed successfully # in the case of an exception before here, the updates will be retried self.results.popleft()
def start_tasks(self): """ Check for queued tasks that we can run here and start them. """ if len(self.executor.get_running_ids()) >= self.concurrency: return # this worker is already running enough tasks with transaction.atomic(): task = Task.first_queued(self.queue_ids) if not task: return # there are no tasks ready to run execution = task.start_execution(self.worker) logger.info('Starting task %s execution %s', task.id, execution.id) self.executor.start_subprocess( execution_id=execution.id, command=execution.task.template.command, environment=execution.task.run.parameters if execution.task.run else {}, timeout=execution.task.template.timeout)
def read_output(self, timeout=0.1) -> typing.List[Result]: """ Read from all ready subprocess file descriptors. The returned byte string could include partial utf-8 characters, and when decoding the user should trim 0-3 bytes off the end to get a readable unicode string. :param timeout: in seconds, provided for testing """ all_results = {} read_set = self.pipes.keys() try: # 1/10 second timeout so we return control to the calling event loop # if no file descriptors are ready to read read_ready, _, _ = select.select(read_set, [], [], timeout) except select.error as exc: # a signal could interrupt. fixed in python 3.5: # https://www.python.org/dev/peps/pep-0475/ if exc.args[0] != errno.EINTR: raise logger.warning('Received select.error: %s', exc) read_ready = [] # read from each ready file descriptor for fd in read_ready: execution = self.pipes[fd] data = fd.read(1024) if data == b'': # the pipe is closed fd.close() del self.pipes[fd] continue # keep reading up to 3 more bytes until we get a full UTF-8 character for _ in range(3): try: data = data.decode('utf-8') break except UnicodeDecodeError: data += fd.read1(1) else: logger.error('Unable to decode byte data! Throwing it away.') result = all_results.setdefault(execution.id, Result(execution.id)) if fd == execution.process.stdout.raw: result.stdout = data else: result.stderr = data # check if each running process needs cleanup for execution in list(self.running.values()): # check if the process should be killed if execution.deadline and execution.deadline < time.monotonic(): # kill the process group, in an attempt to kill any children it has spawned try: # setpgrp above sets the PGID to the subprocess' PID os.killpg(execution.process.pid, signal.SIGKILL) logger.info('Terminated execution #%s', execution.id) except (ProcessLookupError, PermissionError) as exc: logger.info('Execution #%s was marked to kill but has already exited (%s)', execution.id, exc.__class__.__name__) # check if the process has exited exit_code = execution.process.poll() if exit_code is None: continue # we'll check again later # we may not have read everything available, so only cleanup after all pipes are closed open_pipes = ( {execution.process.stdout.raw, execution.process.stderr.raw} & set(self.pipes.keys()) ) if not open_pipes: result = all_results.setdefault(execution.id, Result(execution.id)) result.returncode = execution.process.returncode del self.running[execution.id] return list(all_results.values())