Example #1
0
def test_manual_only_with_resume_in_context():
    """manual only passes when resume = True in context"""
    with context(resume=True):
        assert triggers.manual_only(
            generate_states(success=1, failed=1, skipped=1))
Example #2
0
def test_manual_only_with_empty_set_and_resume_in_context():
    with context(resume=True):
        assert triggers.manual_only(set()) is True
Example #3
0
    def get_task_run_state(
        self,
        state: State,
        inputs: Dict[str, Result],
        timeout_handler: Optional[Callable],
    ) -> State:
        """
        Runs the task and traps any signals or errors it raises.
        Also checkpoints the result of a successful task, if `task.checkpoint` is `True`.

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.
            - timeout_handler (Callable, optional): function for timing out
                task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to
                `prefect.utilities.executors.main_thread_timeout`

        Returns:
            - State: the state of the task after running the check

        Raises:
            - signals.PAUSE: if the task raises PAUSE
            - ENDRUN: if the task is not ready to run
        """
        if not state.is_running():
            self.logger.debug(
                "Task '{name}': can't run task because it's not in a "
                "Running state; ending run.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )

            raise ENDRUN(state)

        try:
            self.logger.debug(
                "Task '{name}': Calling task.run() method...".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            timeout_handler = timeout_handler or main_thread_timeout
            raw_inputs = {k: r.value for k, r in inputs.items()}
            with prefect.context(logger=self.task.logger):
                result = timeout_handler(
                    self.task.run, timeout=self.task.timeout, **raw_inputs
                )

        # inform user of timeout
        except TimeoutError as exc:
            if prefect.context.get("raise_on_exception"):
                raise exc
            state = TimedOut(
                "Task timed out during execution.", result=exc, cached_inputs=inputs
            )
            return state

        result = Result(value=result, result_handler=self.result_handler)
        state = Success(result=result, message="Task run succeeded.")

        ## only checkpoint tasks if running in cloud
        if (
            state.is_successful()
            and prefect.context.get("cloud") is True
            and self.task.checkpoint is True
        ):
            state._result.store_safe_value()

        return state
Example #4
0
    def run(
        self,
        state: State = None,
        task_states: Dict[Task, State] = None,
        return_tasks: Iterable[Task] = None,
        parameters: Dict[str, Any] = None,
        task_runner_state_handlers: Iterable[Callable] = None,
        executor: "prefect.executors.Executor" = None,
        context: Dict[str, Any] = None,
        task_contexts: Dict[Task, Dict[str, Any]] = None,
    ) -> State:
        """
        The main endpoint for FlowRunners.  Calling this method will perform all
        computations contained within the Flow and return the final state of the Flow.

        Args:
            - state (State, optional): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict, optional): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - parameters (dict, optional): dictionary of any needed Parameter
                values, with keys being strings representing Parameter names and values being
                their corresponding values
            - task_runner_state_handlers (Iterable[Callable], optional): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task
                changes state.
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """
        self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))

        # make copies to avoid modifying user inputs
        parameters = dict(parameters or {})
        task_states = dict(task_states or {})
        task_contexts = dict(task_contexts or {})
        # Default to global context, with provided context as override
        run_context = dict(prefect.context)
        run_context.update(context or {})

        if executor is None:
            # Use the executor on the flow, if configured
            executor = getattr(self.flow, "executor", None)
            if executor is None:
                executor = prefect.engine.get_default_executor_class()()

        self.logger.debug("Using executor type %s", type(executor).__name__)

        try:
            state, task_states, run_context, task_contexts = self.initialize_run(
                state=state,
                task_states=task_states,
                context=run_context,
                task_contexts=task_contexts,
                parameters=parameters,
            )

            with prefect.context(run_context):
                state = self.check_flow_is_pending_or_running(state)
                state = self.check_flow_reached_start_time(state)
                state = self.set_flow_to_running(state)
                state = self.get_flow_run_state(
                    state,
                    task_states=task_states,
                    task_contexts=task_contexts,
                    return_tasks=return_tasks,
                    task_runner_state_handlers=task_runner_state_handlers,
                    executor=executor,
                )

        except ENDRUN as exc:
            state = exc.state

        # All other exceptions are trapped and turned into Failed states
        except Exception as exc:
            self.logger.exception(
                "Unexpected error while running flow: {}".format(repr(exc)))
            if run_context.get("raise_on_exception"):
                raise exc
            new_state = Failed(
                message="Unexpected error while running flow: {}".format(
                    repr(exc)),
                result=exc,
            )
            state = self.handle_state_change(state or Pending(), new_state)

        return state
Example #5
0
def kube_secret():
    with set_temporary_config({"cloud.use_local_secrets": True}):
        with prefect.context(secrets=dict(KUBERNETES_API_KEY="test_key")):
            yield
Example #6
0
    def run(
        self,
        state: State = None,
        upstream_states: Dict[Edge, State] = None,
        context: Dict[str, Any] = None,
        executor: "prefect.engine.executors.Executor" = None,
    ) -> State:
        """
        The main endpoint for TaskRunners.  Calling this method will conditionally execute
        `self.task.run` with any provided inputs, assuming the upstream dependencies are in a
        state which allow this Task to run.

        Args:
            - state (State, optional): initial `State` to begin task run from;
                defaults to `Pending()`
            - upstream_states (Dict[Edge, State]): a dictionary
                representing the states of any tasks upstream of this one. The keys of the
                dictionary should correspond to the edges leading to the task.
            - context (dict, optional): prefect Context to use for execution
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration

        Returns:
            - `State` object representing the final post-run state of the Task
        """
        upstream_states = upstream_states or {}
        context = context or {}
        map_index = context.setdefault("map_index", None)
        context["task_full_name"] = "{name}{index}".format(
            name=self.task.name,
            index=("" if map_index is None else "[{}]".format(map_index)),
        )

        if executor is None:
            executor = prefect.engine.get_default_executor_class()()

        # if mapped is true, this task run is going to generate a Mapped state. It won't
        # actually run, but rather spawn children tasks to map over its inputs. We
        # detect this case by checking for:
        #   - upstream edges that are `mapped`
        #   - no `map_index` (which indicates that this is the child task, not the parent)
        mapped = any([e.mapped for e in upstream_states]) and map_index is None
        task_inputs = {}  # type: Dict[str, Any]

        self.logger.info(
            "Task '{name}': Starting task run...".format(name=context["task_full_name"])
        )

        try:
            # initialize the run
            state, context = self.initialize_run(state, context)

            # run state transformation pipeline
            with prefect.context(context):

                # check to make sure the task is in a pending state
                state = self.check_task_is_ready(state)

                # check if the task has reached its scheduled time
                state = self.check_task_reached_start_time(state)

                # Tasks never run if the upstream tasks haven't finished
                state = self.check_upstream_finished(
                    state, upstream_states=upstream_states
                )

                # if the task is mapped, process the mapped children and exit
                if mapped:
                    state = self.run_mapped_task(
                        state=state,
                        upstream_states=upstream_states,
                        context=context,
                        executor=executor,
                    )

                    state = self.wait_for_mapped_task(state=state, executor=executor)

                    self.logger.debug(
                        "Task '{name}': task has been mapped; ending run.".format(
                            name=context["task_full_name"]
                        )
                    )
                    raise ENDRUN(state)

                # check if any upstream tasks skipped (and if we need to skip)
                state = self.check_upstream_skipped(
                    state, upstream_states=upstream_states
                )

                # retrieve task inputs from upstream and also explicitly passed inputs
                task_inputs = self.get_task_inputs(
                    state=state, upstream_states=upstream_states
                )

                # check to see if the task has a cached result
                state = self.check_task_is_cached(state, inputs=task_inputs)

                # check if the task's trigger passes
                # triggers can raise Pauses, which require task_inputs to be available for caching
                # so we run this after the previous step
                state = self.check_task_trigger(state, upstream_states=upstream_states)

                # set the task state to running
                state = self.set_task_to_running(state)

                # run the task
                state = self.get_task_run_state(
                    state, inputs=task_inputs, timeout_handler=executor.timeout_handler
                )

                # cache the output, if appropriate
                state = self.cache_result(state, inputs=task_inputs)

                # check if the task needs to be retried
                state = self.check_for_retry(state, inputs=task_inputs)

        # for pending signals, including retries and pauses we need to make sure the
        # task_inputs are set
        except (ENDRUN, signals.PrefectStateSignal) as exc:
            if exc.state.is_pending():
                exc.state.cached_inputs = task_inputs or {}  # type: ignore
            state = exc.state
            if not isinstance(exc, ENDRUN) and prefect.context.get(
                "raise_on_exception"
            ):
                raise exc

        except Exception as exc:
            msg = "Task '{name}': unexpected error while running task: {exc}".format(
                name=context["task_full_name"], exc=repr(exc)
            )
            self.logger.error(msg)
            state = Failed(message=msg, result=exc)
            if prefect.context.get("raise_on_exception"):
                raise exc

        self.logger.info(
            "Task '{name}': finished task run for task with final state: '{state}'".format(
                name=context["task_full_name"], state=type(state).__name__
            )
        )

        return state
Example #7
0
    def run(
        self,
        state: State = None,
        upstream_states: Dict[Edge, State] = None,
        context: Dict[str, Any] = None,
        is_mapped_parent: bool = False,
    ) -> State:
        """
        The main endpoint for TaskRunners.  Calling this method will conditionally execute
        `self.task.run` with any provided inputs, assuming the upstream dependencies are in a
        state which allow this Task to run.  Additionally, this method will wait and perform
        Task retries which are scheduled for <= 1 minute in the future.

        Args:
            - state (State, optional): initial `State` to begin task run from;
                defaults to `Pending()`
            - upstream_states (Dict[Edge, State]): a dictionary
                representing the states of any tasks upstream of this one. The keys of the
                dictionary should correspond to the edges leading to the task.
            - context (dict, optional): prefect Context to use for execution
            - is_mapped_parent (bool): a boolean indicating whether this task run is the run of
                a parent mapped task

        Returns:
            - `State` object representing the final post-run state of the Task
        """
        with prefect.context(context or {}):
            end_state = super().run(
                state=state,
                upstream_states=upstream_states,
                context=context,
                is_mapped_parent=is_mapped_parent,
            )
            while (end_state.is_retrying() or end_state.is_queued()) and (
                end_state.start_time <= pendulum.now("utc").add(minutes=10)  # type: ignore
            ):
                assert isinstance(end_state, (Retrying, Queued))
                naptime = max(
                    (end_state.start_time - pendulum.now("utc")).total_seconds(), 0
                )
                time.sleep(naptime)

                # send heartbeat on each iteration to let API know task run is still alive
                self.client.update_task_run_heartbeat(
                    task_run_id=prefect.context.get("task_run_id")
                )

                # mapped children will retrieve their latest info inside
                # initialize_run(), but we can load up-to-date versions
                # for all other task runs here
                if prefect.context.get("map_index") in [-1, None]:
                    task_run_info = self.client.get_task_run_info(
                        flow_run_id=prefect.context.get("flow_run_id"),
                        task_id=prefect.context.get("task_id"),
                        map_index=prefect.context.get("map_index"),
                    )

                    # if state was provided, keep it; otherwise use the one from db
                    context.update(task_run_version=task_run_info.version)  # type: ignore

                end_state = super().run(
                    state=end_state,
                    upstream_states=upstream_states,
                    context=context,
                    is_mapped_parent=is_mapped_parent,
                )

            return end_state
def test_deep_map_with_a_retry(monkeypatch):
    """
    Creates a situation in which a deeply-mapped Flow encounters a one-time error in one
    of the middle layers. Running the flow a second time should resolve the error.

    DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(seconds=0)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id),
        ] + [
            TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=executor)

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert len([
            tr for tr in client.task_runs.values() if tr.task_id == t.id
        ]) == 4

    # t2's first child task should be retrying
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert isinstance(t2_0.state, Retrying)

    # t3's first child task should be pending
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_pending()

    # RUN A SECOND TIME
    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=executor)

    # t2's first child task should be successful
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert t2_0.state.is_successful()

    # t3's first child task should be successful
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_successful()
Example #9
0
 def run_with_ctx(*args: Any, _ctx_dict: dict, **kwargs: Any) -> Any:
     with prefect.context(_ctx_dict):
         return fn(*args, **kwargs)
Example #10
0
def test_retry_stores_default_run_count_in_context():
    with prefect.context(task_run_count=5):
        state = Retrying()
    assert state.run_count == 5
Example #11
0
def test_looped_stores_default_loop_count_in_context():
    with prefect.context(task_loop_count=5):
        state = Looped()
    assert state.loop_count == 5