Exemple #1
0
def test_graphql_repr_falls_back_to_dict_repr():
    gql = {"flow_run": Pending("test")}
    res = GraphQLResult(gql)
    assert repr(res) == """{'flow_run': <Pending: "test">}"""
Exemple #2
0
    def get_flow_run_state(
        self,
        state: State,
        task_states: Dict[Task, State],
        task_contexts: Dict[Task, Dict[str, Any]],
        return_tasks: Set[Task],
        task_runner_state_handlers: Iterable[Callable],
        executor: "prefect.engine.executors.base.Executor",
    ) -> State:
        """
        Runs the flow.

        Args:
            - state (State): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - task_contexts (Dict[Task, Dict[str, Any]]): contexts that will be provided to
                each task
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - task_runner_state_handlers (Iterable[Callable]): A list of state change handlers
                that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor): executor to use when performing computation; defaults to the
                executor provided in your prefect configuration

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """
        # this dictionary is used for tracking the states of "children" mapped tasks;
        # when running on Dask, we want to avoid serializing futures, so instead
        # of storing child task states in the `map_states` attribute we instead store
        # in this dictionary and only after they are resolved do we attach them to the Mapped state
        mapped_children = dict()  # type: Dict[Task, list]

        if not state.is_running():
            self.logger.info("Flow is not in a Running state.")
            raise ENDRUN(state)

        if return_tasks is None:
            return_tasks = set()
        if set(return_tasks).difference(self.flow.tasks):
            raise ValueError(
                "Some tasks in return_tasks were not found in the flow.")

        def extra_context(task: Task, task_index: int = None) -> dict:
            return {
                "task_name": task.name,
                "task_tags": task.tags,
                "task_index": task_index,
            }

        # -- process each task in order

        with executor.start():

            for task in self.flow.sorted_tasks():
                task_state = task_states.get(task)

                # if a task is a constant task, we already know its return value
                # no need to use up resources by running it through a task runner
                if task_state is None and isinstance(
                        task, prefect.tasks.core.constants.Constant):
                    task_states[task] = task_state = Success(result=task.value)

                # if the state is finished, don't run the task, just use the provided state if
                # the state is cached / mapped, we still want to run the task runner pipeline
                # steps to either ensure the cache is still valid / or to recreate the mapped
                # pipeline for possible retries
                if (isinstance(task_state, State) and task_state.is_finished()
                        and not task_state.is_cached()
                        and not task_state.is_mapped()):
                    continue

                upstream_states = {}  # type: Dict[Edge, State]

                # this dictionary is used exclusively for "reduce" tasks in particular we store
                # the states / futures corresponding to the upstream children, and if running
                # on Dask, let Dask resolve them at the appropriate time.
                # Note: this is an optimization that allows Dask to resolve the mapped
                # dependencies by "elevating" them to a function argument.
                upstream_mapped_states = {}  # type: Dict[Edge, list]

                # -- process each edge to the task
                for edge in self.flow.edges_to(task):

                    # load the upstream task states (supplying Pending as a default)
                    upstream_states[edge] = task_states.get(
                        edge.upstream_task,
                        Pending(message="Task state not available."))

                    # if the edge is flattened and not the result of a map, then we
                    # preprocess the upstream states. If it IS the result of a
                    # map, it will be handled in `prepare_upstream_states_for_mapping`
                    if edge.flattened:
                        if not isinstance(upstream_states[edge], Mapped):
                            upstream_states[edge] = executor.submit(
                                executors.flatten_upstream_state,
                                upstream_states[edge])

                    # this checks whether the task is a "reduce" task for a mapped pipeline
                    # and if so, collects the appropriate upstream children
                    if not edge.mapped and isinstance(upstream_states[edge],
                                                      Mapped):
                        children = mapped_children.get(edge.upstream_task, [])

                        # if the edge is flattened, then we need to wait for the mapped children
                        # to complete and then flatten them
                        if edge.flattened:
                            children = executors.flatten_mapped_children(
                                mapped_children=children,
                                executor=executor,
                            )

                        upstream_mapped_states[edge] = children

                # augment edges with upstream constants
                for key, val in self.flow.constants[task].items():
                    edge = Edge(
                        upstream_task=prefect.tasks.core.constants.Constant(
                            val),
                        downstream_task=task,
                        key=key,
                    )
                    upstream_states[edge] = Success(
                        "Auto-generated constant value",
                        result=ConstantResult(value=val),
                    )

                # handle mapped tasks
                if any([edge.mapped for edge in upstream_states.keys()]):

                    # wait on upstream states to determine the width of the pipeline
                    # this is the key to depth-first execution
                    upstream_states = executor.wait(
                        {e: state
                         for e, state in upstream_states.items()})
                    # we submit the task to the task runner to determine if
                    # we can proceed with mapping - if the new task state is not a Mapped
                    # state then we don't proceed
                    task_states[task] = executor.wait(
                        executor.submit(
                            run_task,
                            task=task,
                            state=task_state,  # original state
                            upstream_states=upstream_states,
                            context=dict(prefect.context,
                                         **task_contexts.get(task, {})),
                            flow_result=self.flow.result,
                            task_runner_cls=self.task_runner_cls,
                            task_runner_state_handlers=
                            task_runner_state_handlers,
                            upstream_mapped_states=upstream_mapped_states,
                            is_mapped_parent=True,
                            extra_context=extra_context(task),
                        ))

                    # either way, we should now have enough resolved states to restructure
                    # the upstream states into a list of upstream state dictionaries to iterate over
                    list_of_upstream_states = executors.prepare_upstream_states_for_mapping(
                        task_states[task],
                        upstream_states,
                        mapped_children,
                        executor=executor,
                    )

                    submitted_states = []

                    for idx, states in enumerate(list_of_upstream_states):
                        # if we are on a future rerun of a partially complete flow run,
                        # there might be mapped children in a retrying state; this check
                        # looks into the current task state's map_states for such info
                        if (isinstance(task_state, Mapped)
                                and len(task_state.map_states) >= idx + 1):
                            current_state = task_state.map_states[
                                idx]  # type: Optional[State]
                        elif isinstance(task_state, Mapped):
                            current_state = None
                        else:
                            current_state = task_state

                        # this is where each child is submitted for actual work
                        submitted_states.append(
                            executor.submit(
                                run_task,
                                task=task,
                                state=current_state,
                                upstream_states=states,
                                context=dict(
                                    prefect.context,
                                    **task_contexts.get(task, {}),
                                    map_index=idx,
                                ),
                                flow_result=self.flow.result,
                                task_runner_cls=self.task_runner_cls,
                                task_runner_state_handlers=
                                task_runner_state_handlers,
                                upstream_mapped_states=upstream_mapped_states,
                                extra_context=extra_context(task,
                                                            task_index=idx),
                            ))
                    if isinstance(task_states.get(task), Mapped):
                        mapped_children[
                            task] = submitted_states  # type: ignore

                else:
                    task_states[task] = executor.submit(
                        run_task,
                        task=task,
                        state=task_state,
                        upstream_states=upstream_states,
                        context=dict(prefect.context,
                                     **task_contexts.get(task, {})),
                        flow_result=self.flow.result,
                        task_runner_cls=self.task_runner_cls,
                        task_runner_state_handlers=task_runner_state_handlers,
                        upstream_mapped_states=upstream_mapped_states,
                        extra_context=extra_context(task),
                    )

            # ---------------------------------------------
            # Collect results
            # ---------------------------------------------

            # terminal tasks determine if the flow is finished
            terminal_tasks = self.flow.terminal_tasks()

            # reference tasks determine flow state
            reference_tasks = self.flow.reference_tasks()

            # wait until all terminal tasks are finished
            final_tasks = terminal_tasks.union(reference_tasks).union(
                return_tasks)
            final_states = executor.wait({
                t:
                task_states.get(t,
                                Pending("Task not evaluated by FlowRunner."))
                for t in final_tasks
            })

            # also wait for any children of Mapped tasks to finish, and add them
            # to the dictionary to determine flow state
            all_final_states = final_states.copy()
            for t, s in list(final_states.items()):
                if s.is_mapped():
                    # ensure we wait for any mapped children to complete
                    if t in mapped_children:
                        s.map_states = executor.wait(mapped_children[t])
                    s.result = [ms.result for ms in s.map_states]
                    all_final_states[t] = s.map_states

            assert isinstance(final_states, dict)

        key_states = set(
            flatten_seq([all_final_states[t] for t in reference_tasks]))
        terminal_states = set(
            flatten_seq([all_final_states[t] for t in terminal_tasks]))
        return_states = {t: final_states[t] for t in return_tasks}

        state = self.determine_final_state(
            state=state,
            key_states=key_states,
            return_states=return_states,
            terminal_states=terminal_states,
        )

        return state
Exemple #3
0
class FlowRunner(Runner):
    """
    FlowRunners handle the execution of Flows and determine the State of a Flow
    before, during and after the Flow is run.

    In particular, through the FlowRunner you can specify which tasks should be
    the first tasks to run, which tasks should be returned after the Flow is finished,
    and what states each task should be initialized with.

    Args:
        - flow (Flow): the `Flow` to be run
        - task_runner_cls (TaskRunner, optional): The class used for running
            individual Tasks. Defaults to [TaskRunner](task_runner.html)
        - state_handlers (Iterable[Callable], optional): A list of state change handlers
            that will be called whenever the flow changes state, providing an
            opportunity to inspect or modify the new state. The handler
            will be passed the flow runner instance, the old (prior) state, and the new
            (current) state, with the following signature:
            `state_handler(fr: FlowRunner, old_state: State, new_state: State) -> Optional[State]`
            If multiple functions are passed, then the `new_state` argument will be the
            result of the previous handler.

    Note: new FlowRunners are initialized within the call to `Flow.run()` and in general,
    this is the endpoint through which FlowRunners will be interacted with most frequently.

    Example:
    ```python
    @task
    def say_hello():
        print('hello')

    with Flow("My Flow") as f:
        say_hello()

    fr = FlowRunner(flow=f)
    flow_state = fr.run()
    ```
    """

    def __init__(
        self,
        flow: Flow,
        task_runner_cls: type = None,
        state_handlers: Iterable[Callable] = None,
    ):
        self.flow = flow
        if task_runner_cls is None:
            task_runner_cls = prefect.engine.get_default_task_runner_class()
        self.task_runner_cls = task_runner_cls
        super().__init__(state_handlers=state_handlers)

    def __repr__(self) -> str:
        return "<{}: {}>".format(type(self).__name__, self.flow.name)

    def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
        """
        A special state handler that the FlowRunner uses to call its flow's state handlers.
        This method is called as part of the base Runner's `handle_state_change()` method.

        Args:
            - old_state (State): the old (previous) state
            - new_state (State): the new (current) state

        Returns:
            - State: the new state
        """
        self.logger.debug(
            "Flow '{name}': Handling state change from {old} to {new}".format(
                name=self.flow.name,
                old=type(old_state).__name__,
                new=type(new_state).__name__,
            )
        )
        for handler in self.flow.state_handlers:
            new_state = handler(self.flow, old_state, new_state) or new_state

        return new_state

    def initialize_run(  # type: ignore
        self,
        state: Optional[State],
        task_states: Dict[Task, State],
        context: Dict[str, Any],
        task_contexts: Dict[Task, Dict[str, Any]],
        parameters: Dict[str, Any],
    ) -> FlowRunnerInitializeResult:
        """
        Initializes the Task run by initializing state and context appropriately.

        If the provided state is a Submitted state, the state it wraps is extracted.

        Args:
            - state (Optional[State]): the initial state of the run
            - task_states (Dict[Task, State]): a dictionary of any initial task states
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task
            - parameters(dict): the parameter values for the run

        Returns:
            - NamedTuple: a tuple of initialized objects:
                `(state, task_states, context, task_contexts)`
        """

        # overwrite context parameters one-by-one
        context_params = context.setdefault("parameters", {})
        for p in self.flow.parameters():
            if not p.required:
                context_params.setdefault(p.name, p.default)
        for param, value in (parameters or {}).items():
            context_params[param] = value

        context.update(flow_name=self.flow.name)
        context.setdefault("scheduled_start_time", pendulum.now("utc"))

        # add various formatted dates to context
        now = pendulum.now("utc")
        dates = {
            "date": now,
            "today": now.strftime("%Y-%m-%d"),
            "yesterday": now.add(days=-1).strftime("%Y-%m-%d"),
            "tomorrow": now.add(days=1).strftime("%Y-%m-%d"),
            "today_nodash": now.strftime("%Y%m%d"),
            "yesterday_nodash": now.add(days=-1).strftime("%Y%m%d"),
            "tomorrow_nodash": now.add(days=1).strftime("%Y%m%d"),
        }
        for key, val in dates.items():
            context.setdefault(key, val)

        for task in self.flow.tasks:
            task_contexts.setdefault(task, {}).update(
                task_name=task.name, task_slug=self.flow.slugs[task]
            )

        state, context = super().initialize_run(state=state, context=context)
        return FlowRunnerInitializeResult(
            state=state,
            task_states=task_states,
            context=context,
            task_contexts=task_contexts,
        )

    def run(
        self,
        state: State = None,
        task_states: Dict[Task, State] = None,
        return_tasks: Iterable[Task] = None,
        parameters: Dict[str, Any] = None,
        task_runner_state_handlers: Iterable[Callable] = None,
<<<<<<< HEAD
        executor: "prefect.executors.Executor" = None,
=======
        executor: "prefect.engine.executors.Executor" = None,
>>>>>>> prefect clone
        context: Dict[str, Any] = None,
        task_contexts: Dict[Task, Dict[str, Any]] = None,
    ) -> State:
        """
        The main endpoint for FlowRunners.  Calling this method will perform all
        computations contained within the Flow and return the final state of the Flow.

        Args:
            - state (State, optional): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict, optional): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - parameters (dict, optional): dictionary of any needed Parameter
                values, with keys being strings representing Parameter names and values being
                their corresponding values
            - task_runner_state_handlers (Iterable[Callable], optional): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task
                changes state.
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """
        self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))

        # make copies to avoid modifying user inputs
        task_states = dict(task_states or {})
        context = dict(context or {})
        task_contexts = dict(task_contexts or {})
        parameters = dict(parameters or {})
        if executor is None:
            # Use the executor on the flow, if configured
            executor = getattr(self.flow, "executor", None)
            if executor is None:
                executor = prefect.engine.get_default_executor_class()()

        self.logger.debug("Using executor type %s", type(executor).__name__)

        try:
            state, task_states, context, task_contexts = self.initialize_run(
                state=state,
                task_states=task_states,
                context=context,
                task_contexts=task_contexts,
                parameters=parameters,
            )

            with prefect.context(context):
                state = self.check_flow_is_pending_or_running(state)
                state = self.check_flow_reached_start_time(state)
                state = self.set_flow_to_running(state)
                state = self.get_flow_run_state(
                    state,
                    task_states=task_states,
                    task_contexts=task_contexts,
                    return_tasks=return_tasks,
                    task_runner_state_handlers=task_runner_state_handlers,
                    executor=executor,
                )

        except ENDRUN as exc:
            state = exc.state

        # All other exceptions are trapped and turned into Failed states
        except Exception as exc:
            self.logger.exception(
                "Unexpected error while running flow: {}".format(repr(exc))
            )
            if prefect.context.get("raise_on_exception"):
                raise exc
            new_state = Failed(
                message="Unexpected error while running flow: {}".format(repr(exc)),
                result=exc,
            )
            state = self.handle_state_change(state or Pending(), new_state)

        return state
Exemple #4
0
async def _create_flow_run(
    flow_id: str = None,
    parameters: dict = None,
    context: dict = None,
    scheduled_start_time: datetime.datetime = None,
    flow_run_name: str = None,
    version_group_id: str = None,
) -> Any:
    """
    Creates a new flow run for an existing flow.

    Args:
        - flow_id (str): A string representing the current flow id
        - parameters (dict, optional): A dictionary of parameters that were specified for the flow
        - context (dict, optional): A dictionary of context values
        - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`,
            defaults to right now. Must be UTC.
        - flow_run_name (str, optional): An optional string representing this flow run
        - version_group_id (str, optional): An optional version group ID; if provided, will run the most
            recent unarchived version of the group
    """

    if flow_id is None and version_group_id is None:
        raise ValueError(
            "One of flow_id or version_group_id must be provided.")

    scheduled_start_time = scheduled_start_time or pendulum.now()

    if flow_id:
        where_clause = {"id": {"_eq": flow_id}}
    elif version_group_id:
        where_clause = {
            "version_group_id": {
                "_eq": version_group_id
            },
            "archived": {
                "_eq": False
            },
        }

    flow = await models.Flow.where(where=where_clause).first(
        {
            "id": True,
            "archived": True,
            "tenant_id": True,
            "parameters": True,
            "flow_group_id": True,
            "flow_group": {
                "default_parameters": True
            },
        },
        order_by={"version": EnumValue("desc")},
    )  # type: Any

    if not flow:
        msg = (f"Flow {flow_id} not found" if flow_id else
               f"Version group {version_group_id} has no unarchived flows.")
        raise exceptions.NotFound(msg)
    elif flow.archived:
        raise ValueError(f"Flow {flow.id} is archived.")

    # check parameters
    run_parameters = flow.flow_group.default_parameters
    run_parameters.update((parameters or {}))
    required_parameters = [p["name"] for p in flow.parameters if p["required"]]
    missing = set(required_parameters).difference(run_parameters)
    if missing:
        raise ValueError(f"Required parameters were not supplied: {missing}")
    state = Scheduled(message="Flow run scheduled.",
                      start_time=scheduled_start_time)

    run = models.FlowRun(
        tenant_id=flow.tenant_id,
        flow_id=flow_id or flow.id,
        parameters=run_parameters,
        context=context or {},
        scheduled_start_time=scheduled_start_time,
        name=flow_run_name or names.generate_slug(2),
        states=[
            models.FlowRunState(
                tenant_id=flow.tenant_id,
                **models.FlowRunState.fields_from_state(
                    Pending(message="Flow run created")),
            )
        ],
    )

    flow_run_id = await run.insert()

    # apply the flow run's initial state via `set_flow_run_state`
    await api.states.set_flow_run_state(flow_run_id=flow_run_id, state=state)

    return flow_run_id
Exemple #5
0

@pytest.mark.parametrize(
    "state_check",
    [
        dict(state=Cancelled(), assert_true={"is_finished"}),
        dict(state=Cached(),
             assert_true={"is_cached", "is_finished", "is_successful"}),
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(),
             assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Retrying(),
             assert_true={"is_pending", "is_scheduled", "is_retrying"}),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Skipped(),
             assert_true={"is_finished", "is_successful", "is_skipped"}),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
        dict(state=Success(), assert_true={"is_finished", "is_successful"}),
        dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
        dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}),
    ],
)
def test_state_is_methods(state_check):
Exemple #6
0
def test_states_with_mutable_attrs_are_hashable():
    assert {State(result=[1]), Pending(cached_inputs=dict(a=1))}
Exemple #7
0
 def initialize_run(self, *args, **kwargs):
     raise ENDRUN(state=Pending())
class TestTaskRunStates:
    async def test_set_task_run_state(self, task_run_id):
        result = await api.states.set_task_run_state(
            task_run_id=task_run_id, state=Failed()
        )

        assert result.task_run_id == task_run_id

        query = await models.TaskRun.where(id=task_run_id).first(
            {"version", "state", "serialized_state"}
        )

        assert query.version == 2
        assert query.state == "Failed"
        assert query.serialized_state["type"] == "Failed"

    @pytest.mark.parametrize("state", [Failed(), Success()])
    async def test_set_task_run_state_fails_with_wrong_task_run_id(self, state):
        with pytest.raises(ValueError, match="State update failed"):
            await api.states.set_task_run_state(
                task_run_id=str(uuid.uuid4()), state=state
            )

    @pytest.mark.parametrize(
        "state", [s() for s in State.children() if not s().is_running()]
    )
    async def test_state_does_not_set_heartbeat_unless_running(
        self, state, task_run_id
    ):
        task_run = await models.TaskRun.where(id=task_run_id).first({"heartbeat"})
        assert task_run.heartbeat is None

        await api.states.set_task_run_state(task_run_id=task_run_id, state=state)

        task_run = await models.TaskRun.where(id=task_run_id).first({"heartbeat"})
        assert task_run.heartbeat is None

    async def test_running_state_sets_heartbeat(self, task_run_id, running_flow_run_id):
        task_run = await models.TaskRun.where(id=task_run_id).first({"heartbeat"})
        assert task_run.heartbeat is None

        dt = pendulum.now("UTC")
        await api.states.set_task_run_state(task_run_id=task_run_id, state=Running())

        task_run = await models.TaskRun.where(id=task_run_id).first({"heartbeat"})
        assert task_run.heartbeat > dt

    async def test_trigger_failed_state_does_not_set_end_time(self, task_run_id):
        await api.states.set_task_run_state(
            task_run_id=task_run_id, state=TriggerFailed()
        )
        task_run_info = await models.TaskRun.where(id=task_run_id).first(
            {"id", "start_time", "end_time"}
        )
        assert not task_run_info.start_time
        assert not task_run_info.end_time

    @pytest.mark.parametrize(
        "state",
        [s() for s in State.children() if s not in _MetaState.children()],
        ids=[s.__name__ for s in State.children() if s not in _MetaState.children()],
    )
    async def test_setting_a_task_run_state_pulls_cached_inputs_if_possible(
        self, task_run_id, state, running_flow_run_id
    ):

        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Failed(cached_inputs=complex_result)
        await models.TaskRun.where(id=task_run_id).update(
            set=dict(serialized_state=cached_state.serialize())
        )

        # try to schedule the task run to scheduled
        await api.states.set_task_run_state(task_run_id=task_run_id, state=state)

        task_run = await models.TaskRun.where(id=task_run_id).first(
            {"serialized_state"}
        )

        # ensure the state change took place
        assert task_run.serialized_state["type"] == type(state).__name__
        assert task_run.serialized_state["cached_inputs"]["x"]["value"] == 1
        assert task_run.serialized_state["cached_inputs"]["y"]["value"] == {"z": 2}

    @pytest.mark.parametrize(
        "state",
        [
            s(cached_inputs=None)
            for s in State.children()
            if s not in _MetaState.children()
        ],
        ids=[s.__name__ for s in State.children() if s not in _MetaState.children()],
    )
    async def test_task_runs_with_null_cached_inputs_do_not_overwrite_cache(
        self, state, task_run_id, running_flow_run_id
    ):

        await api.states.set_task_run_state(task_run_id=task_run_id, state=state)
        # set up a Retrying state with non-null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Retrying(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id, state=cached_state)
        run = await models.TaskRun.where(id=task_run_id).first({"serialized_state"})

        assert run.serialized_state["cached_inputs"]["x"]["value"] == 1
        assert run.serialized_state["cached_inputs"]["y"]["value"] == {"z": 2}

    @pytest.mark.parametrize(
        "state_cls", [s for s in State.children() if s not in _MetaState.children()]
    )
    async def test_task_runs_cached_inputs_give_preference_to_new_cached_inputs(
        self, state_cls, task_run_id, running_flow_run_id
    ):

        # set up a Failed state with null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"a": 2}, result_handler=JSONResultHandler())
        complex_result = {"b": res1, "c": res2}
        cached_state = state_cls(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id, state=cached_state)
        # set up a Retrying state with non-null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Retrying(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id, state=cached_state)
        run = Box(
            await models.TaskRun.where(id=task_run_id).first({"serialized_state"})
        )

        # verify that we have cached inputs, and that preference has been given to the new
        # cached inputs
        assert run.serialized_state.cached_inputs
        assert run.serialized_state.cached_inputs.x.value == 1
        assert run.serialized_state.cached_inputs.y.value == {"z": 2}

    @pytest.mark.parametrize(
        "flow_run_state", [Pending(), Running(), Failed(), Success()]
    )
    async def test_running_states_can_not_be_set_if_flow_run_is_not_running(
        self, flow_run_id, task_run_id, flow_run_state
    ):

        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id, state=flow_run_state
        )

        set_running_coroutine = api.states.set_task_run_state(
            task_run_id=task_run_id, state=Running()
        )

        if flow_run_state.is_running():
            assert await set_running_coroutine
            assert (
                await models.TaskRun.where(id=task_run_id).first({"state"})
            ).state == "Running"
        else:

            with pytest.raises(ValueError, match="is not in a running state"):
                await set_running_coroutine
            assert (
                await models.TaskRun.where(id=task_run_id).first({"state"})
            ).state != "Running"
Exemple #9
0
 def test_state_type_methods_with_pending_state(self):
     state = Pending()
     assert state.is_pending()
     assert not state.is_retrying()
     assert not state.is_cached()
     assert not state.is_running()
     assert not state.is_finished()
     assert not state.is_skipped()
     assert not state.is_scheduled()
     assert not state.is_successful()
     assert not state.is_failed()
     assert not state.is_mapped()
     assert not state.is_meta_state()
Exemple #10
0
def test_flow_run_handles_error_states_when_initial_state_is_provided():
    with Flow(name="test") as f:
        res = AddTask()("5", 5)
    state = f.run(state=Pending())
    assert state.is_failed()
Exemple #11
0
def test_graphql_repr_falls_back_to_dict_repr():
    gql = {"flow_run": Pending("test")}
    res = as_nested_dict(gql, GraphQLResult)
    assert repr(res) == """{'flow_run': Pending("test")}"""
Exemple #12
0
async def get_or_create_mapped_task_run_children(
    flow_run_id: str, task_id: str, max_map_index: int
) -> List[str]:
    """
    Creates and/or retrieves mapped child task runs for a given flow run and task.

    Args:
        - flow_run_id (str): the flow run associated with the parent task run
        - task_id (str): the task ID to create and/or retrieve
        - max_map_index (int,): the number of mapped children e.g., a value of 2 yields 3 mapped children
    """
    # grab task info
    task = await models.Task.where(id=task_id).first({"cache_key", "tenant_id"})
    # generate task runs to upsert
    task_runs = [
        models.TaskRun(
            tenant_id=task.tenant_id,
            flow_run_id=flow_run_id,
            task_id=task_id,
            map_index=i,
            cache_key=task.cache_key,
        )
        for i in range(max_map_index + 1)
    ]
    # upsert the mapped children
    task_runs = (
        await models.TaskRun().insert_many(
            objects=task_runs,
            on_conflict=dict(
                constraint="task_run_unique_identifier_key",
                update_columns=["cache_key"],
            ),
            selection_set={"returning": {"id", "map_index"}},
        )
    )["returning"]
    task_runs.sort(key=lambda task_run: task_run.map_index)
    # get task runs without states
    stateless_runs = await models.TaskRun.where(
        {
            "_and": [
                {"flow_run_id": {"_eq": flow_run_id}},
                {"task_id": {"_eq": task_id}},
                {"state_id": {"_is_null": True}},
            ]
        }
    ).get({"id", "map_index", "version"})
    # create and insert states for stateless task runs
    task_run_states = [
        models.TaskRunState(
            tenant_id=task.tenant_id,
            task_run_id=task_run.id,
            **models.TaskRunState.fields_from_state(
                Pending(message="Task run created")
            ),
        )
        for task_run in stateless_runs
    ]
    await models.TaskRunState().insert_many(task_run_states)

    # return the task run ids
    return [task_run.id for task_run in task_runs]
Exemple #13
0
def test_preparing_state_for_cloud_replaces_cached_inputs_with_safe():
    xres = Result(3, result_handler=JSONResultHandler())
    state = prepare_state_for_cloud(Pending(cached_inputs=dict(x=xres)))
    assert state.is_pending()
    assert state.result == NoResult
    assert state.cached_inputs == dict(x=xres)
Exemple #14
0
    def run(
        self,
        state: State = None,
        task_states: Dict[Task, State] = None,
        return_tasks: Iterable[Task] = None,
        parameters: Dict[str, Any] = None,
        task_runner_state_handlers: Iterable[Callable] = None,
        executor: "prefect.engine.executors.Executor" = None,
        context: Dict[str, Any] = None,
        task_contexts: Dict[Task, Dict[str, Any]] = None,
    ) -> State:
        """
        The main endpoint for FlowRunners.  Calling this method will perform all
        computations contained within the Flow and return the final state of the Flow.

        Args:
            - state (State, optional): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict, optional): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - parameters (dict, optional): dictionary of any needed Parameter
                values, with keys being strings representing Parameter names and values being
                their corresponding values
            - task_runner_state_handlers (Iterable[Callable], optional): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be provided to each task

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """

        self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))

        # make copies to avoid modifying user inputs
        task_states = dict(task_states or {})
        context = dict(context or {})
        task_contexts = dict(task_contexts or {})
        parameters = dict(parameters or {})
        if executor is None:
            executor = prefect.engine.get_default_executor_class()()

        try:
            state, task_states, context, task_contexts = self.initialize_run(
                state=state,
                task_states=task_states,
                context=context,
                task_contexts=task_contexts,
                parameters=parameters,
            )

            with prefect.context(context):
                state = self.check_flow_is_pending_or_running(state)
                state = self.check_flow_reached_start_time(state)
                state = self.set_flow_to_running(state)
                state = self.get_flow_run_state(
                    state,
                    task_states=task_states,
                    task_contexts=task_contexts,
                    return_tasks=return_tasks,
                    task_runner_state_handlers=task_runner_state_handlers,
                    executor=executor,
                )

        except ENDRUN as exc:
            state = exc.state

        # All other exceptions are trapped and turned into Failed states
        except Exception as exc:
            self.logger.exception(
                "Unexpected error while running flow: {}".format(repr(exc)))
            if prefect.context.get("raise_on_exception"):
                raise exc
            new_state = Failed(
                message="Unexpected error while running flow: {}".format(
                    repr(exc)),
                result=exc,
            )
            state = self.handle_state_change(state or Pending(), new_state)

        return state
Exemple #15
0
class TestRunFlowStep:
    def test_running_state_finishes(self):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_successful()

    @pytest.mark.parametrize(
        "state",
        [Pending(),
         Retrying(),
         Finished(),
         Success(),
         Failed(),
         Skipped()])
    def test_other_states_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).get_flow_run_state(
                state=state,
                task_states={},
                task_contexts={},
                return_tasks=set(),
                task_runner_state_handlers=[],
                executor=Executor(),
            )

    def test_determine_final_state_has_final_say(self):
        class MyFlowRunner(FlowRunner):
            def determine_final_state(self, *args, **kwargs):
                return Failed("Very specific error message")

        flow = Flow(name="test", tasks=[Task()])
        new_state = MyFlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_failed()
        assert new_state.message == "Very specific error message"

    def test_determine_final_state_preserves_running_states_when_tasks_still_running(
        self, ):
        task = Task()
        flow = Flow(name="test", tasks=[task])
        old_state = Running()
        new_state = FlowRunner(flow=flow).get_flow_run_state(
            state=old_state,
            task_states={
                task: Retrying(start_time=pendulum.now("utc").add(days=1))
            },
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state is old_state
Exemple #16
0
    def get_flow_run_state(
        self,
        state: State,
        task_states: Dict[Task, State],
        task_contexts: Dict[Task, Dict[str, Any]],
        return_tasks: Set[Task],
        task_runner_state_handlers: Iterable[Callable],
        executor: "prefect.engine.executors.base.Executor",
    ) -> State:
        """
        Runs the flow.

        Args:
            - state (State): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - task_contexts (Dict[Task, Dict[str, Any]]): contexts that will be provided to each task
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - task_runner_state_handlers (Iterable[Callable]): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor): executor to use when performing
                computation; defaults to the executor provided in your prefect configuration

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """

        if not state.is_running():
            self.logger.info("Flow is not in a Running state.")
            raise ENDRUN(state)

        if return_tasks is None:
            return_tasks = set()
        if set(return_tasks).difference(self.flow.tasks):
            raise ValueError(
                "Some tasks in return_tasks were not found in the flow.")

        # -- process each task in order

        with executor.start():

            for task in self.flow.sorted_tasks():

                task_state = task_states.get(task)
                if task_state is None and isinstance(
                        task, prefect.tasks.core.constants.Constant):
                    task_states[task] = task_state = Success(result=task.value)

                # if the state is finished, don't run the task, just use the provided state
                if (isinstance(task_state, State) and task_state.is_finished()
                        and not task_state.is_cached()
                        and not task_state.is_mapped()):
                    continue

                upstream_states = {
                }  # type: Dict[Edge, Union[State, Iterable]]

                # -- process each edge to the task
                for edge in self.flow.edges_to(task):
                    upstream_states[edge] = task_states.get(
                        edge.upstream_task,
                        Pending(message="Task state not available."))

                # -- run the task

                with prefect.context(task_full_name=task.name,
                                     task_tags=task.tags):
                    task_states[task] = executor.submit(
                        self.run_task,
                        task=task,
                        state=task_state,
                        upstream_states=upstream_states,
                        context=dict(prefect.context,
                                     **task_contexts.get(task, {})),
                        task_runner_state_handlers=task_runner_state_handlers,
                        executor=executor,
                    )

            # ---------------------------------------------
            # Collect results
            # ---------------------------------------------

            # terminal tasks determine if the flow is finished
            terminal_tasks = self.flow.terminal_tasks()

            # reference tasks determine flow state
            reference_tasks = self.flow.reference_tasks()

            # wait until all terminal tasks are finished
            final_tasks = terminal_tasks.union(reference_tasks).union(
                return_tasks)
            final_states = executor.wait({
                t:
                task_states.get(t,
                                Pending("Task not evaluated by FlowRunner."))
                for t in final_tasks
            })

            # also wait for any children of Mapped tasks to finish, and add them
            # to the dictionary to determine flow state
            all_final_states = final_states.copy()
            for t, s in list(final_states.items()):
                if s.is_mapped():
                    s.map_states = executor.wait(s.map_states)
                    s.result = [ms.result for ms in s.map_states]
                    all_final_states[t] = s.map_states

            assert isinstance(final_states, dict)

        key_states = set(
            flatten_seq([all_final_states[t] for t in reference_tasks]))
        terminal_states = set(
            flatten_seq([all_final_states[t] for t in terminal_tasks]))
        return_states = {t: final_states[t] for t in return_tasks}

        state = self.determine_final_state(
            state=state,
            key_states=key_states,
            return_states=return_states,
            terminal_states=terminal_states,
        )

        return state
Exemple #17
0
class TestInitializeRun:
    def test_initialize_sets_none_to_pending(self):
        result = FlowRunner(Flow(name="test")).initialize_run(state=None,
                                                              task_states={},
                                                              context={},
                                                              task_contexts={},
                                                              parameters={})
        assert result.state.is_pending()

    @pytest.mark.parametrize("state", [Pending(), Running()])
    def test_initialize_returns_state_if_provided(self, state):
        result = FlowRunner(Flow(name="test")).initialize_run(state=state,
                                                              task_states={},
                                                              context={},
                                                              task_contexts={},
                                                              parameters={})
        assert result.state is state

    def test_initialize_sets_task_contexts(self):
        t1 = Task(name="t1")
        t2 = Parameter(name="x")
        flow = Flow(name="test", tasks=[t1, t2])

        result = FlowRunner(flow).initialize_run(state=Pending(),
                                                 task_states={},
                                                 context={},
                                                 task_contexts={},
                                                 parameters={})
        assert result.task_contexts == {
            t: dict(task_name=t.name, task_slug=flow.slugs[t])
            for t in flow.tasks
        }

    def test_initialize_puts_parameters_in_context(self):
        x = Parameter(name="x")
        flow = Flow(name="test", tasks=[x])

        result = FlowRunner(flow).initialize_run(
            state=Pending(),
            task_states={},
            context={},
            task_contexts={},
            parameters={"x": 1},
        )
        assert result.context["parameters"] == {"x": 1}

    def test_parameter_precedance(self):
        x = Parameter(name="x")
        flow = Flow(name="test", tasks=[x])

        result = FlowRunner(flow).initialize_run(
            state=Pending(),
            task_states={},
            context={"parameters": {
                "x": 2,
                "y": 1
            }},
            task_contexts={},
            parameters={"x": 1},
        )
        assert result.context["parameters"] == {"x": 1, "y": 1}
Exemple #18
0
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor):
    """
    This test makes sure that mapped tasks properly generate children tasks even when
    run multiple times and without available upstream results. In this test, we run the pipeline
    from a variety of starting points, ensuring that some upstream results are unavailable and
    checking that children pipelines are properly regenerated.
    """
    @prefect.task
    def numbers():
        return [1, 2, 3]

    @prefect.task
    def plus_one(x):
        return x + 1

    @prefect.task
    def get_sum(x):
        return sum(x)

    with Flow(name="test") as f:
        n = numbers()
        x = plus_one.map(n)
        y = plus_one.map(x)
        s = get_sum(y)

    # first run with a missing result from `n` but map_states for `x`
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[
                Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n and x
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Retrying(cached_inputs={"x": Result(4)}),
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n, x, and y
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Success(result=5)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12
Exemple #19
0
class TestTaskRunStates:
    async def test_set_task_run_state(self, task_run_id):
        result = await api.states.set_task_run_state(task_run_id=task_run_id,
                                                     state=Failed())

        assert result.task_run_id == task_run_id

        query = await models.TaskRun.where(id=task_run_id).first(
            {"version", "state", "serialized_state"})

        assert query.version == 2
        assert query.state == "Failed"
        assert query.serialized_state["type"] == "Failed"

    @pytest.mark.parametrize("state", [Failed(), Success()])
    async def test_set_task_run_state_fails_with_wrong_task_run_id(
            self, state):
        with pytest.raises(ValueError, match="State update failed"):
            await api.states.set_task_run_state(task_run_id=str(uuid.uuid4()),
                                                state=state)

    @pytest.mark.parametrize(
        "state", [s() for s in State.children() if not s().is_running()])
    async def test_state_does_not_set_heartbeat_unless_running(
            self, state, task_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=state)

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

    async def test_running_state_sets_heartbeat(self, task_run_id,
                                                running_flow_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        dt = pendulum.now("UTC")
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=Running())

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat > dt

    async def test_trigger_failed_state_does_not_set_end_time(
            self, task_run_id):
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=TriggerFailed())
        task_run_info = await models.TaskRun.where(id=task_run_id).first(
            {"id", "start_time", "end_time"})
        assert not task_run_info.start_time
        assert not task_run_info.end_time

    @pytest.mark.parametrize(
        "flow_run_state",
        [Pending(), Running(), Failed(),
         Success()])
    async def test_running_states_can_not_be_set_if_flow_run_is_not_running(
            self, flow_run_id, task_run_id, flow_run_state):

        await api.states.set_flow_run_state(flow_run_id=flow_run_id,
                                            state=flow_run_state)

        set_running_coroutine = api.states.set_task_run_state(
            task_run_id=task_run_id, state=Running())

        if flow_run_state.is_running():
            assert await set_running_coroutine
            assert (await
                    models.TaskRun.where(id=task_run_id
                                         ).first({"state"})).state == "Running"
        else:

            with pytest.raises(ValueError, match="is not in a running state"):
                await set_running_coroutine
            assert (await models.TaskRun.where(id=task_run_id).first(
                {"state"})).state != "Running"
Exemple #20
0
def test_states_are_hashable():
    assert {State(), Pending(), Success()}
Exemple #21
0
async def get_or_create_task_run_info(
    flow_run_id: str, task_id: str, map_index: int = None
) -> dict:
    """
    Given a flow_run_id, task_id, and map_index, return details about the corresponding task run.
    If the task run doesn't exist, it will be created.

    Returns:
        - dict: a dict of details about the task run, including its id, version, and state.
    """

    if map_index is None:
        map_index = -1

    task_run = await models.TaskRun.where(
        {
            "flow_run_id": {"_eq": flow_run_id},
            "task_id": {"_eq": task_id},
            "map_index": {"_eq": map_index},
        }
    ).first({"id", "version", "state", "serialized_state"})

    if task_run:
        return dict(
            id=task_run.id,
            version=task_run.version,
            state=task_run.state,
            serialized_state=task_run.serialized_state,
        )

    # if it isn't found, add it to the DB
    task = await models.Task.where(id=task_id).first({"cache_key", "tenant_id"})
    if not task:
        raise ValueError("Invalid task ID")

    db_task_run = models.TaskRun(
        tenant_id=task.tenant_id,
        flow_run_id=flow_run_id,
        task_id=task_id,
        map_index=map_index,
        cache_key=task.cache_key,
        version=0,
    )

    db_task_run_state = models.TaskRunState(
        tenant_id=task.tenant_id,
        state="Pending",
        timestamp=pendulum.now(),
        message="Task run created",
        serialized_state=Pending(message="Task run created").serialize(),
    )

    db_task_run.states = [db_task_run_state]
    run = await db_task_run.insert(
        on_conflict=dict(
            constraint="task_run_unique_identifier_key",
            update_columns=["cache_key"],
        ),
        selection_set={"returning": {"id"}},
    )

    return dict(
        id=run.returning.id,
        version=db_task_run.version,
        state="Pending",
        serialized_state=db_task_run_state.serialized_state,
    )
Exemple #22
0
 def __init__(self, id, state=None, version=None):
     self.id = id
     self.state = state or Pending()
     self.version = version or 0
Exemple #23
0
                # TODO: we only need to rerun these tasks if any pending
                # downstream tasks depend on them.
                if (
                    isinstance(
                        task,
                        (
                            prefect.tasks.core.resource_manager.ResourceSetupTask,
                            prefect.tasks.core.resource_manager.ResourceCleanupTask,
                            prefect.tasks.secrets.SecretBase,
                        ),
                    )
                    and task_state is not None
                    and task_state.is_finished()
                    and not task_state.is_cached()
                ):
                    task_states[task] = task_state = Pending()

=======
>>>>>>> prefect clone
                # if the state is finished, don't run the task, just use the provided state if
                # the state is cached / mapped, we still want to run the task runner pipeline
                # steps to either ensure the cache is still valid / or to recreate the mapped
                # pipeline for possible retries
                if (
                    isinstance(task_state, State)
                    and task_state.is_finished()
                    and not task_state.is_cached()
                    and not task_state.is_mapped()
                ):
                    continue