예제 #1
0
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            new_state = Mapped("Ready to proceed with mapping.")
            raise ENDRUN(new_state)
예제 #2
0
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            # this indicates we are executing a re-run of a mapped pipeline;
            # in this case, we populate both `map_states` and `cached_inputs`
            # to ensure the flow runner can properly regenerate the child tasks,
            # regardless of whether we mapped over an exchanged piece of data
            # or a non-data-exchanging upstream dependency
            if len(state.map_states
                   ) == 0 and state.n_map_states > 0:  # type: ignore
                state.map_states = [None] * state.n_map_states  # type: ignore
            state.cached_inputs = {
                edge.key: state._result  # type: ignore
                for edge, state in upstream_states.items() if edge.key
            }
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            # compute and set n_map_states
            n_map_states = min(
                [
                    len(s.result) for e, s in upstream_states.items()
                    if e.mapped and s.is_successful() and not s.is_mapped()
                ] + [
                    s.n_map_states  # type: ignore
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_mapped()
                ],
                default=0,
            )
            new_state = Mapped("Ready to proceed with mapping.",
                               n_map_states=n_map_states)
            raise ENDRUN(new_state)
예제 #3
0
    def check_task_is_ready(self, state: State) -> State:
        """
        Checks to make sure the task is ready to run (Pending or Mapped).

        Args:
            - state (State): the current state of this task

        Returns:
            - State: the state of the task after running the check

        Raises:
            - ENDRUN: if the task is not ready to run
        """

        # the task is ready
        if state.is_pending():
            return state

        # the task is mapped, in which case we still proceed so that the children tasks
        # are generated (note that if the children tasks)
        elif state.is_mapped():
            self.logger.debug(
                "Task '{name}': task is mapped, but run will proceed so children are generated.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            return state

        # this task is already running
        elif state.is_running():
            self.logger.debug(
                "Task '{name}': task is already running.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            raise ENDRUN(state)

        elif state.is_cached():
            return state

        # this task is already finished
        elif state.is_finished():
            self.logger.debug(
                "Task '{name}': task is already finished.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            raise ENDRUN(state)

        # this task is not pending
        else:
            self.logger.debug(
                "Task '{name}' is not ready to run or state was unrecognized ({state}).".format(
                    name=prefect.context.get("task_full_name", self.task.name),
                    state=state,
                )
            )
            raise ENDRUN(state)
예제 #4
0
    def wait_for_mapped_task(
        self, state: State, executor: "prefect.engine.executors.Executor"
    ) -> State:
        """
        Blocks until a mapped state's children have finished running.

        Args:
            - state (State): the current `Mapped` state
            - executor (Executor): the run's executor

        Returns:
            - State: the new state
        """
        if state.is_mapped():
            assert isinstance(state, Mapped)  # mypy assert
            state.map_states = executor.wait(state.map_states)
        return state
예제 #5
0
    def load_results(
        self, state: State, upstream_states: Dict[Edge, State]
    ) -> Tuple[State, Dict[Edge, State]]:
        """
        Given the task's current state and upstream states, populates all relevant result
        objects for this task run.

        Args:
            - state (State): the task's current state.
            - upstream_states (Dict[Edge, State]): the upstream state_handlers

        Returns:
            - Tuple[State, dict]: a tuple of (state, upstream_states)

        """
        upstream_results = {}

        try:
            if state.is_mapped():
                # ensures mapped children are only loaded once
                state = state.load_result(self.result)
            for edge, upstream_state in upstream_states.items():
                upstream_states[edge] = upstream_state.load_result(
                    edge.upstream_task.result or self.flow_result
                )
                if edge.key is not None:
                    upstream_results[edge.key] = (
                        edge.upstream_task.result or self.flow_result
                    )

            state.load_cached_results(upstream_results)
            return state, upstream_states
        except Exception as exc:
            new_state = Failed(
                message=f"Failed to retrieve task results: {exc}", result=exc
            )
            final_state = self.handle_state_change(old_state=state, new_state=new_state)
            raise ENDRUN(final_state) from exc
예제 #6
0
    def run_mapped_task(
        self,
        state: State,
        upstream_states: Dict[Edge, State],
        context: Dict[str, Any],
        executor: "prefect.engine.executors.Executor",
    ) -> State:
        """
        If the task is being mapped, submits children tasks for execution. Returns a `Mapped` state.

        Args:
            - state (State): the current task state
            - upstream_states (Dict[Edge, State]): the upstream states
            - context (dict, optional): prefect Context to use for execution
            - executor (Executor): executor to use when performing computation

        Returns:
            - State: the state of the task after running the check

        Raises:
            - ENDRUN: if the current state is not `Running`
        """

        map_upstream_states = []

        # we don't know how long the iterables are, but we want to iterate until we reach
        # the end of the shortest one
        counter = itertools.count()

        # infinite loop, if upstream_states has any entries
        while True and upstream_states:
            i = next(counter)
            states = {}

            try:

                for edge, upstream_state in upstream_states.items():

                    # if the edge is not mapped over, then we take its state
                    if not edge.mapped:
                        states[edge] = upstream_state

                    # if the edge is mapped and the upstream state is Mapped, then we are mapping
                    # over a mapped task. In this case, we take the appropriately-indexed upstream
                    # state from the upstream tasks's `Mapped.map_states` array.
                    # Note that these "states" might actually be futures at this time; we aren't
                    # blocking until they finish.
                    elif edge.mapped and upstream_state.is_mapped():
                        states[edge] = upstream_state.map_states[i]  # type: ignore

                    # Otherwise, we are mapping over the result of a "vanilla" task. In this
                    # case, we create a copy of the upstream state but set the result to the
                    # appropriately-indexed item from the upstream task's `State.result`
                    # array.
                    else:
                        states[edge] = copy.copy(upstream_state)

                        # if the current state is already Mapped, then we might be executing
                        # a re-run of the mapping pipeline. In that case, the upstream states
                        # might not have `result` attributes (as any required results could be
                        # in the `cached_inputs` attribute of one of the child states).
                        # Therefore, we only try to get a result if EITHER this task's
                        # state is not already mapped OR the upstream result is not None.
                        if not state.is_mapped() or upstream_state._result != NoResult:
                            upstream_result = Result(
                                upstream_state.result[i],
                                result_handler=upstream_state._result.result_handler,  # type: ignore
                            )
                            states[edge].result = upstream_result
                        elif state.is_mapped():
                            if i >= len(state.map_states):  # type: ignore
                                raise IndexError()

                # only add this iteration if we made it through all iterables
                map_upstream_states.append(states)

            # index error means we reached the end of the shortest iterable
            except IndexError:
                break

        def run_fn(
            state: State, map_index: int, upstream_states: Dict[Edge, State]
        ) -> State:
            map_context = context.copy()
            map_context.update(map_index=map_index)
            with prefect.context(self.context):
                return self.run(
                    upstream_states=upstream_states,
                    # if we set the state here, then it will not be processed by `initialize_run()`
                    state=state,
                    context=map_context,
                    executor=executor,
                )

        # generate initial states, if available
        if isinstance(state, Mapped):
            initial_states = list(state.map_states)  # type: List[Optional[State]]
        else:
            initial_states = []
        initial_states.extend([None] * (len(map_upstream_states) - len(initial_states)))

        current_state = Mapped(
            message="Preparing to submit {} mapped tasks.".format(len(initial_states)),
            map_states=initial_states,  # type: ignore
        )
        state = self.handle_state_change(old_state=state, new_state=current_state)
        if state is not current_state:
            return state

        # map over the initial states, a counter representing the map_index, and also the mapped upstream states
        map_states = executor.map(
            run_fn, initial_states, range(len(map_upstream_states)), map_upstream_states
        )

        self.logger.debug(
            "{} mapped tasks submitted for execution.".format(len(map_states))
        )
        new_state = Mapped(
            message="Mapped tasks submitted for execution.", map_states=map_states
        )
        return self.handle_state_change(old_state=state, new_state=new_state)