Beispiel #1
0
    def cache_result(self, state: State, inputs: Dict[str, Result]) -> State:
        """
        Caches the result of a successful task, if appropriate. Alternatively,
        if the task is failed, caches the inputs.

        Tasks are cached if:
            - task.cache_for is not None
            - the task state is Successful
            - the task state is not Skipped (which is a subclass of Successful)

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.

        Returns:
            - State: the state of the task after running the check

        """
        state.cached_inputs = inputs

        if (state.is_successful() and not state.is_skipped()
                and self.task.cache_for is not None):
            expiration = pendulum.now("utc") + self.task.cache_for
            cached_state = Cached(
                result=state._result,
                cached_inputs=inputs,
                cached_result_expiration=expiration,
                cached_parameters=prefect.context.get("parameters"),
                message=state.message,
            )
            return cached_state

        return state
Beispiel #2
0
async def set_task_run_state(task_run_id: str, state: State, force=False) -> None:
    """
    Updates a task run state.

    Args:
        - task_run_id (str): the task run id to update
        - state (State): the new state
        - false (bool): if True, avoids pipeline checks
    """

    if task_run_id is None:
        raise ValueError(f"Invalid task run ID.")

    task_run = await models.TaskRun.where({"id": {"_eq": task_run_id},}).first(
        {
            "id": True,
            "version": True,
            "state": True,
            "serialized_state": True,
            "flow_run": {"id": True, "state": True},
        }
    )

    if not task_run:
        raise ValueError(f"Invalid task run ID: {task_run_id}.")

    # ------------------------------------------------------
    # if the state is running, ensure the flow run is also running
    # ------------------------------------------------------
    if not force and state.is_running() and task_run.flow_run.state != "Running":
        raise ValueError(
            f"State update failed for task run ID {task_run_id}: provided "
            f"a running state but associated flow run {task_run.flow_run.id} is not "
            "in a running state."
        )

    # ------------------------------------------------------
    # if we have cached inputs on the old state, we need to carry them forward
    # ------------------------------------------------------
    if not state.cached_inputs and task_run.serialized_state.get("cached_inputs", None):
        # load up the old state's cached inputs and apply them to the new state
        serialized_state = state_schema.load(task_run.serialized_state)
        state.cached_inputs = serialized_state.cached_inputs

    # --------------------------------------------------------
    # prepare the new state for the database
    # --------------------------------------------------------

    task_run_state = models.TaskRunState(
        task_run_id=task_run.id,
        version=(task_run.version or 0) + 1,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        state=type(state).__name__,
        serialized_state=state.serialize(),
    )

    await task_run_state.insert()
    def check_task_ready_to_map(self, state: State,
                                upstream_states: Dict[Edge, State]) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            # this indicates we are executing a re-run of a mapped pipeline;
            # in this case, we populate both `map_states` and `cached_inputs`
            # to ensure the flow runner can properly regenerate the child tasks,
            # regardless of whether we mapped over an exchanged piece of data
            # or a non-data-exchanging upstream dependency
            if len(state.map_states
                   ) == 0 and state.n_map_states > 0:  # type: ignore
                state.map_states = [None] * state.n_map_states  # type: ignore
            state.cached_inputs = {
                edge.key: state._result  # type: ignore
                for edge, state in upstream_states.items() if edge.key
            }
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any([
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
        ]):
            new_state = Failed(
                "No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all([
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items() if
                state.is_successful() and not state.is_mapped() and edge.mapped
        ]):
            new_state = Failed(
                "At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            # compute and set n_map_states
            n_map_states = min(
                [
                    len(s.result) for e, s in upstream_states.items()
                    if e.mapped and s.is_successful() and not s.is_mapped()
                ] + [
                    s.n_map_states  # type: ignore
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_mapped()
                ],
                default=0,
            )
            new_state = Mapped("Ready to proceed with mapping.",
                               n_map_states=n_map_states)
            raise ENDRUN(new_state)
Beispiel #4
0
async def set_task_run_state(task_run_id: str, state: State,) -> None:
    """
    Updates a task run state.

    Args:
        - task_run_id (str): the task run id to update
        - state (State): the new state
    """

    if task_run_id is None:
        raise ValueError(f"Invalid task run ID.")

    task_run = await models.TaskRun.where({"id": {"_eq": task_run_id},}).first(
        {"id": True, "version": True, "state": True, "serialized_state": True,}
    )

    if not task_run:
        raise ValueError(f"Invalid task run ID: {task_run_id}.")

    # ------------------------------------------------------
    # if we have cached inputs on the old state, we need to carry them forward
    # ------------------------------------------------------

    if not state.cached_inputs and task_run.serialized_state.get("cached_inputs", None):
        # load up the old state's cached inputs and apply them to the new state
        serialized_state = state_schema.load(task_run.serialized_state)
        state.cached_inputs = serialized_state.cached_inputs

    # --------------------------------------------------------
    # prepare the new state for the database
    # --------------------------------------------------------

    task_run_state = models.TaskRunState(
        task_run_id=task_run.id,
        version=(task_run.version or 0) + 1,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        state=type(state).__name__,
        serialized_state=state.serialize(),
    )

    await task_run_state.insert()
Beispiel #5
0
async def set_task_run_state(
        task_run_id: str,
        state: State,
        version: int = None,
        flow_run_version: int = None) -> models.TaskRunState:
    """
    Updates a task run state.

    Args:
        - task_run_id (str): the task run id to update
        - state (State): the new state
        - version (int): a version to enforce version-locking
        - flow_run_version (int): a flow run version to enforce version-lockgin

    Returns:
        - models.TaskRunState
    """

    if task_run_id is None:
        raise ValueError(f"Invalid task run ID.")

    where = {
        "id": {
            "_eq": task_run_id
        },
        "_or": [
            {
                # EITHER version locking is enabled and the versions match
                "version": {
                    "_eq": version
                },
                "flow_run": {
                    "version": {
                        "_eq": flow_run_version
                    },
                    "flow": {
                        "flow_group": {
                            "settings": {
                                "_contains": {
                                    "version_locking_enabled": True
                                }
                            }
                        }
                    },
                },
            },
            # OR version locking is not enabled
            {
                "flow_run": {
                    "flow": {
                        "flow_group": {
                            "_not": {
                                "settings": {
                                    "_contains": {
                                        "version_locking_enabled": True
                                    }
                                }
                            }
                        }
                    }
                }
            },
        ],
    }

    task_run = await models.TaskRun.where(where).first({
        "id": True,
        "tenant_id": True,
        "version": True,
        "state": True,
        "serialized_state": True,
        "flow_run": {
            "id": True,
            "state": True
        },
    })

    if not task_run:
        raise ValueError(f"State update failed for task run ID {task_run_id}")

    # ------------------------------------------------------
    # if the state is running, ensure the flow run is also running
    # ------------------------------------------------------
    if state.is_running() and task_run.flow_run.state != "Running":
        raise ValueError(
            f"State update failed for task run ID {task_run_id}: provided "
            f"a running state but associated flow run {task_run.flow_run.id} is not "
            "in a running state.")

    # ------------------------------------------------------
    # if we have cached inputs on the old state, we need to carry them forward
    # ------------------------------------------------------
    if not state.cached_inputs and task_run.serialized_state.get(
            "cached_inputs", None):
        # load up the old state's cached inputs and apply them to the new state
        serialized_state = state_schema.load(task_run.serialized_state)
        state.cached_inputs = serialized_state.cached_inputs

    # --------------------------------------------------------
    # prepare the new state for the database
    # --------------------------------------------------------

    task_run_state = models.TaskRunState(
        id=str(uuid.uuid4()),
        tenant_id=task_run.tenant_id,
        task_run_id=task_run.id,
        version=(task_run.version or 0) + 1,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        state=type(state).__name__,
        serialized_state=state.serialize(),
    )

    await task_run_state.insert()

    # --------------------------------------------------------
    # apply downstream updates
    # --------------------------------------------------------

    # FOR RUNNING STATES:
    #   - update the task run heartbeat
    if state.is_running():
        await api.runs.update_task_run_heartbeat(task_run_id=task_run_id)

    return task_run_state