예제 #1
0
    def run(self):
        # redirect stdout and stderr to queue
        logger.redirect_output(self.event_queue, self.task.path())

        succeeded = True
        attempt = 0
        try:
            while True:
                if not self.task.run():
                    if attempt < self.task.max_retries:
                        attempt += 1
                        delay = pow(2, attempt + 2)
                        logger.log(
                            message=
                            f'Retry {attempt}/{self.task.max_retries} in {delay} seconds',
                            is_error=True,
                            format=logger.Format.ITALICS)
                        time.sleep(delay)
                    else:
                        succeeded = False
                        break
                else:
                    break
        except Exception as e:
            logger.log(message=traceback.format_exc(),
                       format=logger.Format.VERBATIM,
                       is_error=True)
            succeeded = False

        self.status_queue.put(succeeded)
예제 #2
0
    def run(self):
        # redirect stdout and stderr to queue
        logger.redirect_output(self.event_queue, self.task.path())

        succeeded = True
        try:

            if not self.task.run():
                succeeded = False
        except Exception as e:
            logger.log(message=traceback.format_exc(),
                       format=logger.Format.VERBATIM,
                       is_error=True)
            succeeded = False

        self.status_queue.put(succeeded)
예제 #3
0
    def run():

        # collect system stats in a separate Process
        statistics_process = multiprocessing.Process(
            target=lambda: system_statistics.generate_system_statistics(
                event_queue),
            name='system_statistics')
        statistics_process.start()

        try:
            # capture output of print statements and other unplanned output
            logger.redirect_output(event_queue, pipeline.path())

            # all nodes that have not run yet, ordered by priority
            node_queue: [pipelines.Node] = []

            # data needed for computing cost
            node_durations_and_run_times = node_cost.node_durations_and_run_times(
                pipeline.path())

            # Putting nodes into the node queue
            def queue(nodes: [pipelines.Node]):
                for node in nodes:
                    node_cost.compute_cost(node, node_durations_and_run_times)
                    node_queue.append(node)
                node_queue.sort(key=lambda node: node.cost, reverse=True)

            if nodes:  # only run a set of child nodes

                def with_all_upstreams(nodes: {pipelines.Node}):
                    """recursively find all upstreams of a list of nodes"""
                    return functools.reduce(
                        set.union,
                        [with_all_upstreams(node.upstreams)
                         for node in nodes], nodes)

                # when requested, include all upstreams of nodes, otherwise just use provided nodes
                nodes_to_run = with_all_upstreams(
                    set(nodes)) if with_upstreams else set(nodes)

                # remove everything from pipeline that should not be run
                # (that's makes updating dependencies between nodes easier)
                for node in set(pipeline.nodes.values()) - nodes_to_run:
                    pipeline.remove(node)

                # queue remaining nodes
                queue(list((pipeline.nodes).values()))

            else:
                # remove dependencies to siblings
                pipeline.upstreams = set()
                pipeline.downstreams = set()
                # queue whole pipeline
                queue([pipeline])

            # book keeping
            run_start_time = datetime.datetime.now()
            # all nodes that already ran or that won't be run anymore
            processed_nodes: {pipelines.Node} = set()
            # running pipelines with start times and number of running children
            running_pipelines: {
                pipelines.Pipeline: [datetime.datetime, int]
            } = {}
            failed_pipelines: {pipelines.Pipeline
                               } = set()  # pipelines with failed tasks
            running_task_processes: {pipelines.Task: TaskProcess} = {}

            def dequeue() -> pipelines.Node:
                """
                Finds the next task in the queue
                - without upstreams or where all upstreams have been run already
                - where the pipeline specific maximum number of parallel tasks per pipeline is not reached
                """
                for node in node_queue:  # type: pipelines.Node
                    if ((not node.upstreams
                         or len(node.upstreams & processed_nodes) == len(
                             node.upstreams)) and
                        (not isinstance(node.parent, pipelines.Pipeline) or
                         (not node.parent.max_number_of_parallel_tasks) or
                         (not node.parent in running_pipelines) or
                         (running_pipelines[node.parent][1] <
                          node.parent.max_number_of_parallel_tasks))):
                        node_queue.remove(node)
                        if node.parent in failed_pipelines:
                            processed_nodes.add(
                                node
                            )  # if the parent pipeline failed, don't launch new nodes
                        else:
                            return node

            def track_finished_pipelines():
                """when all nodes of a pipeline have been processed, then emit events"""
                for running_pipeline, (start_time, running_children) \
                        in dict(running_pipelines).items():  # type: pipelines.Pipeline
                    if len(
                            set(running_pipeline.nodes.values())
                            & processed_nodes) == len(running_pipeline.nodes):
                        succeeded = running_pipeline not in failed_pipelines
                        event_queue.put(
                            events.Output(
                                node_path=running_pipeline.path(),
                                format=logger.Format.ITALICS,
                                is_error=not succeeded,
                                message=
                                f'{"succeeded" if succeeded else "failed"}, {logger.format_time_difference(run_start_time, datetime.datetime.now())}'
                            ))
                        event_queue.put(
                            events.NodeFinished(
                                node_path=running_pipeline.path(),
                                start_time=start_time,
                                end_time=datetime.datetime.now(),
                                is_pipeline=True,
                                succeeded=succeeded))
                        del running_pipelines[running_pipeline]
                        processed_nodes.add(running_pipeline)

            # announce run start
            event_queue.put(
                events.RunStarted(node_path=pipeline.path(),
                                  start_time=run_start_time,
                                  pid=os.getpid()))

            # run as long
            # - as task processes are still running
            # - as there is still stuff in the node queue
            while running_task_processes or node_queue:
                # don't do anything if the maximum number of parallel tasks is currently running
                if len(running_task_processes
                       ) < config.max_number_of_parallel_tasks():

                    next_node = dequeue(
                    )  # get the next runnable node from the queue

                    if next_node:
                        if isinstance(next_node, pipelines.Pipeline):
                            # connect pipeline nodes without upstreams to upstreams of pipeline
                            for upstream in next_node.upstreams:
                                for pipeline_node in next_node.nodes.values():
                                    if not pipeline_node.upstreams:
                                        next_node.add_dependency(
                                            upstream, pipeline_node)

                            # connect pipeline nodes without downstreams to downstream of pipeline
                            for downstream in next_node.downstreams:
                                for pipeline_node in next_node.nodes.values():
                                    if not pipeline_node.downstreams:
                                        next_node.add_dependency(
                                            pipeline_node, downstream)

                            # queue all child nodes
                            queue(list(next_node.nodes.values()))

                            # book keeping and event emission
                            pipeline_start_time = datetime.datetime.now()
                            running_pipelines[next_node] = [
                                pipeline_start_time, 0
                            ]
                            event_queue.put(
                                events.NodeStarted(next_node.path(),
                                                   pipeline_start_time, True))
                            event_queue.put(
                                events.Output(
                                    node_path=next_node.path(),
                                    format=logger.Format.ITALICS,
                                    message='★ ' + node_cost.format_duration(
                                        node_durations_and_run_times.get(
                                            tuple(next_node.path()),
                                            [0, 0])[0])))

                        elif isinstance(next_node, pipelines.ParallelTask):
                            # create sub tasks and queue them
                            try:
                                logger.redirect_output(event_queue,
                                                       next_node.path())
                                logger.log('☆ Launching tasks',
                                           format=logger.Format.ITALICS)
                                sub_pipeline = next_node.launch()
                                next_node.parent.replace(
                                    next_node, sub_pipeline)
                                queue([sub_pipeline])

                            except Exception as e:
                                logger.log(
                                    message=f'Could not launch parallel tasks',
                                    format=logger.Format.ITALICS,
                                    is_error=True)
                                logger.log(
                                    message=traceback.format_exc(),
                                    format=events.Output.Format.VERBATIM,
                                    is_error=True)
                                failed_pipelines.add(next_node.parent)
                            finally:
                                logger.redirect_output(event_queue,
                                                       pipeline.path())

                        else:
                            # run a task in a subprocess
                            if next_node.parent in running_pipelines:
                                running_pipelines[next_node.parent][1] += 1
                            event_queue.put(
                                events.NodeStarted(next_node.path(),
                                                   datetime.datetime.now(),
                                                   False))
                            event_queue.put(
                                events.Output(
                                    node_path=next_node.path(),
                                    format=logger.Format.ITALICS,
                                    message='★ ' + node_cost.format_duration(
                                        node_durations_and_run_times.get(
                                            tuple(next_node.path()),
                                            [0, 0])[0])))

                            status_queue = multiprocessing.Queue()
                            process = TaskProcess(next_node, event_queue,
                                                  status_queue)
                            process.start()
                            running_task_processes[next_node] = process

                # check whether some of the running processes finished
                for task_process in list(
                        running_task_processes.values()):  # type: TaskProcess
                    if task_process.is_alive():
                        pass
                    else:
                        del running_task_processes[task_process.task]
                        if task_process.task.parent in running_pipelines:
                            running_pipelines[task_process.task.parent][1] -= 1

                        processed_nodes.add(task_process.task)

                        succeeded = not (task_process.status_queue.get()
                                         == False
                                         or task_process.exitcode != 0)
                        if not succeeded:
                            for parent in task_process.task.parents()[:-1]:
                                failed_pipelines.add(parent)

                        end_time = datetime.datetime.now()
                        event_queue.put(
                            events.Output(
                                task_process.task.path(),
                                ('succeeded' if succeeded else 'failed') +
                                ',  ' + logger.format_time_difference(
                                    task_process.start_time, end_time),
                                format=logger.Format.ITALICS,
                                is_error=not succeeded))
                        event_queue.put(
                            events.NodeFinished(task_process.task.path(),
                                                task_process.start_time,
                                                end_time, False, succeeded))

                # check if some pipelines finished
                track_finished_pipelines()

                # don't busy-wait
                time.sleep(0.001)

        except:
            event_queue.put(
                events.Output(node_path=pipeline.path(),
                              message=traceback.format_exc(),
                              format=logger.Format.ITALICS,
                              is_error=True))

        # run again because `dequeue` might have moved more nodes to `finished_nodes`
        track_finished_pipelines()

        # kill the stats process (joining or terminating does not work in gunicorn)
        os.kill(statistics_process.pid, signal.SIGKILL)
        statistics_process.join()

        # run finished
        event_queue.put(
            events.RunFinished(node_path=pipeline.path(),
                               end_time=datetime.datetime.now(),
                               succeeded=not failed_pipelines))