Ejemplo n.º 1
0
    def update_downstream(self):
        """
        Enqueue downstream tasks that can run since this one has completed.

        If this runs in a separate transaction from saving the task status,
        then we cannot miss a task because of concurrency.
        """
        downstream = Task.objects.filter(
            run=self.run,
            template__upstream=self.template,
            status__in=[Task.WAITING,
                        Task.UPSTREAM_FAILED]).select_for_update()

        for task in downstream:
            if self.status == Task.SUCCEEDED:
                waiting_for = Task.objects.filter(
                    run=self.run,
                    template__downstream=task.template_id).exclude(
                        status=Task.SUCCEEDED).exists()
                if not waiting_for:
                    logger.info(
                        'Submitting %s because all dependencies are complete',
                        task)
                    task.enqueue()

            elif task.status == Task.WAITING:
                # my status is failed or upstream_failed, fail anything waiting downstream
                logger.info('Setting status of %s to upstream_failed', task)
                task.status = Task.UPSTREAM_FAILED
                task.save()
                task.update_downstream()
Ejemplo n.º 2
0
 def mark_terminated(self, execution_ids: typing.Iterable[int]):
     """
     Set the deadline to now for the given execution_ids so they
     are terminated in the next loop.
     """
     now = time.monotonic()
     for execution_id in execution_ids:
         if execution_id in self.running:
             self.running[execution_id].deadline = now
         else:
             logger.info('Execution #%s is not running, ignoring termination request', execution_id)
Ejemplo n.º 3
0
 def save_results(self):
     """Persist the subprocess results to the database"""
     for _ in range(len(self.results)):
         with transaction.atomic():
             result = self.results[0]
             if result.stdout or result.stderr:
                 Execution.update_output(result.execution_id, result.stdout,
                                         result.stderr)
             if result.returncode is not None:
                 execution = Execution.objects.get(id=result.execution_id)
                 logger.info('Task %s execution %s exited with code %s',
                             execution.task.id, execution.id,
                             result.returncode)
                 execution.mark_finished(result.returncode)
             # pop after doing the updates have completed successfully
             # in the case of an exception before here, the updates will be retried
             self.results.popleft()
Ejemplo n.º 4
0
    def start_tasks(self):
        """
        Check for queued tasks that we can run here and start them.
        """
        if len(self.executor.get_running_ids()) >= self.concurrency:
            return  # this worker is already running enough tasks

        with transaction.atomic():

            task = Task.first_queued(self.queue_ids)
            if not task:
                return  # there are no tasks ready to run

            execution = task.start_execution(self.worker)

        logger.info('Starting task %s execution %s', task.id, execution.id)
        self.executor.start_subprocess(
            execution_id=execution.id,
            command=execution.task.template.command,
            environment=execution.task.run.parameters
            if execution.task.run else {},
            timeout=execution.task.template.timeout)
Ejemplo n.º 5
0
    def read_output(self, timeout=0.1) -> typing.List[Result]:
        """
        Read from all ready subprocess file descriptors.

        The returned byte string could include partial utf-8 characters,
        and when decoding the user should trim 0-3 bytes off the end to
        get a readable unicode string.

        :param timeout: in seconds, provided for testing
        """
        all_results = {}
        read_set = self.pipes.keys()

        try:
            # 1/10 second timeout so we return control to the calling event loop
            # if no file descriptors are ready to read
            read_ready, _, _ = select.select(read_set, [], [], timeout)
        except select.error as exc:
            # a signal could interrupt. fixed in python 3.5:
            # https://www.python.org/dev/peps/pep-0475/
            if exc.args[0] != errno.EINTR:
                raise
            logger.warning('Received select.error: %s', exc)
            read_ready = []

        # read from each ready file descriptor
        for fd in read_ready:
            execution = self.pipes[fd]
            data = fd.read(1024)
            if data == b'':
                # the pipe is closed
                fd.close()
                del self.pipes[fd]
                continue

            # keep reading up to 3 more bytes until we get a full UTF-8 character
            for _ in range(3):
                try:
                    data = data.decode('utf-8')
                    break
                except UnicodeDecodeError:
                    data += fd.read1(1)
            else:
                logger.error('Unable to decode byte data! Throwing it away.')

            result = all_results.setdefault(execution.id, Result(execution.id))
            if fd == execution.process.stdout.raw:
                result.stdout = data
            else:
                result.stderr = data

        # check if each running process needs cleanup
        for execution in list(self.running.values()):
            # check if the process should be killed
            if execution.deadline and execution.deadline < time.monotonic():
                # kill the process group, in an attempt to kill any children it has spawned
                try:
                    # setpgrp above sets the PGID to the subprocess' PID
                    os.killpg(execution.process.pid, signal.SIGKILL)
                    logger.info('Terminated execution #%s', execution.id)
                except (ProcessLookupError, PermissionError) as exc:
                    logger.info('Execution #%s was marked to kill but has already exited (%s)',
                                execution.id, exc.__class__.__name__)

            # check if the process has exited
            exit_code = execution.process.poll()
            if exit_code is None:
                continue  # we'll check again later

            # we may not have read everything available, so only cleanup after all pipes are closed
            open_pipes = (
                {execution.process.stdout.raw, execution.process.stderr.raw}
                & set(self.pipes.keys())
            )
            if not open_pipes:
                result = all_results.setdefault(execution.id, Result(execution.id))
                result.returncode = execution.process.returncode
                del self.running[execution.id]

        return list(all_results.values())