コード例 #1
0
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            new_state = Mapped("Ready to proceed with mapping.")
            raise ENDRUN(new_state)
コード例 #2
0
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            # this indicates we are executing a re-run of a mapped pipeline;
            # in this case, we populate both `map_states` and `cached_inputs`
            # to ensure the flow runner can properly regenerate the child tasks,
            # regardless of whether we mapped over an exchanged piece of data
            # or a non-data-exchanging upstream dependency
            if len(state.map_states
                   ) == 0 and state.n_map_states > 0:  # type: ignore
                state.map_states = [None] * state.n_map_states  # type: ignore
            state.cached_inputs = {
                edge.key: state._result  # type: ignore
                for edge, state in upstream_states.items() if edge.key
            }
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            # compute and set n_map_states
            n_map_states = min(
                [
                    len(s.result) for e, s in upstream_states.items()
                    if e.mapped and s.is_successful() and not s.is_mapped()
                ] + [
                    s.n_map_states  # type: ignore
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_mapped()
                ],
                default=0,
            )
            new_state = Mapped("Ready to proceed with mapping.",
                               n_map_states=n_map_states)
            raise ENDRUN(new_state)
コード例 #3
0
    def cache_result(self, state: State, inputs: Dict[str, Result]) -> State:
        """
        Caches the result of a successful task, if appropriate. Alternatively,
        if the task is failed, caches the inputs.

        Tasks are cached if:
            - task.cache_for is not None
            - the task state is Successful
            - the task state is not Skipped (which is a subclass of Successful)

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.

        Returns:
            - State: the state of the task after running the check

        """
        if (state.is_successful() and not state.is_skipped()
                and self.task.cache_for is not None):
            expiration = pendulum.now("utc") + self.task.cache_for
            cached_state = Cached(
                result=state._result,
                hashed_inputs={
                    key: tokenize(val.value)
                    for key, val in inputs.items()
                },
                cached_result_expiration=expiration,
                cached_parameters=prefect.context.get("parameters"),
                message=state.message,
            )
            return cached_state

        return state
コード例 #4
0
    def get_task_run_state(
        self,
        state: State,
        inputs: Dict[str, Result],
        timeout_handler: Optional[Callable],
    ) -> State:
        """
        Runs the task and traps any signals or errors it raises.
        Also checkpoints the result of a successful task, if `task.checkpoint` is `True`.

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.
            - timeout_handler (Callable, optional): function for timing out
                task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to
                `prefect.utilities.executors.main_thread_timeout`

        Returns:
            - State: the state of the task after running the check

        Raises:
            - signals.PAUSE: if the task raises PAUSE
            - ENDRUN: if the task is not ready to run
        """
        if not state.is_running():
            self.logger.debug(
                "Task '{name}': can't run task because it's not in a "
                "Running state; ending run.".format(name=prefect.context.get(
                    "task_full_name", self.task.name)))

            raise ENDRUN(state)

        try:
            self.logger.debug(
                "Task '{name}': Calling task.run() method...".format(
                    name=prefect.context.get("task_full_name",
                                             self.task.name)))
            timeout_handler = timeout_handler or main_thread_timeout
            raw_inputs = {k: r.value for k, r in inputs.items()}
            result = timeout_handler(self.task.run,
                                     timeout=self.task.timeout,
                                     **raw_inputs)

        # inform user of timeout
        except TimeoutError as exc:
            if prefect.context.get("raise_on_exception"):
                raise exc
            state = TimedOut("Task timed out during execution.",
                             result=exc,
                             cached_inputs=inputs)
            return state

        result = Result(value=result, result_handler=self.result_handler)
        state = Success(result=result, message="Task run succeeded.")

        if state.is_successful() and self.task.checkpoint is True:
            state._result.store_safe_value()

        return state
コード例 #5
0
def test_no_residential_etl_tasks_fail(etl_flow_state: State) -> None:
    """No etl tasks fail.

    Args:
        etl_flow_state (State): A Prefect State object containing flow run information
    """
    assert etl_flow_state.is_successful()
コード例 #6
0
def checkpoint_handler(task_runner: DSTaskRunner, old_state: State, new_state: State) -> State:
    """
    A handler designed to implement result caching by filename. If the result handler's ``read``
    method can be successfully run, this handler loads the result of that method as the task result
    and sets the task state to ``Success``. Similarly, on successful
    completion of the task, if the task was actually run and not loaded from cache, this handler
    will apply the result handler's ``write`` method to the task.

    Parameters
    ----------
    task_runner : instance of DSTaskRunner
        The task runner associated with the flow the handler is used in.
    old_state : instance of prefect.engine.state.State
        The current state of the task.
    new_state : instance of prefect.engine.state.State
        The expected new state of the task.

    Returns
    -------
    new_state : instance of prefect.engine.state.State
        The actual new state of the task.
    """
    if "PREFECT__FLOWS__CHECKPOINTING" in os.environ and os.environ["PREFECT__FLOWS__CHECKPOINTING"] == "true":
        raise AttributeError("Cannot use standard prefect checkpointing with this handler")

    if task_runner.result_handler is not None and old_state.is_pending() and new_state.is_running():
        if not hasattr(task_runner, "upstream_states"):
            raise TypeError(
                "upstream_states not found in task runner. Make sure to use "
                "prefect_ds.task_runner.DSTaskRunner."
            )
        input_mapping = _create_input_mapping(task_runner.upstream_states)
        try:
            data = task_runner.task.result_handler.read(input_mapping=input_mapping)
        except FileNotFoundError:
            return new_state
        except TypeError: # unexpected argument input_mapping
            raise TypeError(
                "Result handler could not accept input_mapping argument. "
                "Please ensure that you are using a handler from prefect_ds."
            )
        result = Result(value=data, result_handler=task_runner.task.result_handler)
        state = Success(result=result, message="Task loaded from disk.")
        return state

    if task_runner.result_handler is not None and old_state.is_running() and new_state.is_successful():
        input_mapping = _create_input_mapping(task_runner.upstream_states)
        task_runner.task.result_handler.write(new_state.result, input_mapping=input_mapping)

    return new_state
コード例 #7
0
    def get_task_run_state(
        self,
        state: State,
        inputs: Dict[str, Result],
        timeout_handler: Optional[Callable] = None,
    ) -> State:
        """
        Runs the task and traps any signals or errors it raises.
        Also checkpoints the result of a successful task, if `task.checkpoint` is `True`.

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.
            - timeout_handler (Callable, optional): function for timing out
                task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to
                `prefect.utilities.executors.timeout_handler`

        Returns:
            - State: the state of the task after running the check

        Raises:
            - signals.PAUSE: if the task raises PAUSE
            - ENDRUN: if the task is not ready to run
        """
        if not state.is_running():
            self.logger.debug(
                "Task '{name}': can't run task because it's not in a "
                "Running state; ending run.".format(name=prefect.context.get(
                    "task_full_name", self.task.name)))

            raise ENDRUN(state)

        try:
            self.logger.debug(
                "Task '{name}': Calling task.run() method...".format(
                    name=prefect.context.get("task_full_name",
                                             self.task.name)))
            timeout_handler = (timeout_handler
                               or prefect.utilities.executors.timeout_handler)
            raw_inputs = {k: r.value for k, r in inputs.items()}

            if getattr(self.task, "log_stdout", False):
                with redirect_stdout(
                        prefect.utilities.logging.RedirectToLog(
                            self.logger)):  # type: ignore
                    result = timeout_handler(self.task.run,
                                             timeout=self.task.timeout,
                                             **raw_inputs)
            else:
                result = timeout_handler(self.task.run,
                                         timeout=self.task.timeout,
                                         **raw_inputs)

        except KeyboardInterrupt:
            self.logger.debug("Interrupt signal raised, cancelling task run.")
            state = Cancelled(
                message="Interrupt signal raised, cancelling task run.")
            return state

        # inform user of timeout
        except TimeoutError as exc:
            if prefect.context.get("raise_on_exception"):
                raise exc
            state = TimedOut("Task timed out during execution.",
                             result=exc,
                             cached_inputs=inputs)
            return state

        except signals.LOOP as exc:
            new_state = exc.state
            assert isinstance(new_state, Looped)
            new_state.result = Result(value=new_state.result,
                                      result_handler=self.result_handler)
            new_state.cached_inputs = inputs
            new_state.message = exc.state.message or "Task is looping ({})".format(
                new_state.loop_count)
            return new_state

        result = Result(value=result, result_handler=self.result_handler)
        state = Success(result=result,
                        message="Task run succeeded.",
                        cached_inputs=inputs)

        ## checkpoint tasks if a result_handler is present, except for when the user has opted out by disabling checkpointing
        if (state.is_successful()
                and prefect.context.get("checkpointing") is True
                and self.task.checkpoint is not False
                and self.result_handler is not None):
            state._result.store_safe_value()

        return state