def cache_result(self, state: State, inputs: Dict[str, Result]) -> State: """ Caches the result of a successful task, if appropriate. Alternatively, if the task is failed, caches the inputs. Tasks are cached if: - task.cache_for is not None - the task state is Successful - the task state is not Skipped (which is a subclass of Successful) Args: - state (State): the current state of this task - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond to the task's `run()` arguments. Returns: - State: the state of the task after running the check """ state.cached_inputs = inputs if (state.is_successful() and not state.is_skipped() and self.task.cache_for is not None): expiration = pendulum.now("utc") + self.task.cache_for cached_state = Cached( result=state._result, cached_inputs=inputs, cached_result_expiration=expiration, cached_parameters=prefect.context.get("parameters"), message=state.message, ) return cached_state return state
async def set_task_run_state(task_run_id: str, state: State, force=False) -> None: """ Updates a task run state. Args: - task_run_id (str): the task run id to update - state (State): the new state - false (bool): if True, avoids pipeline checks """ if task_run_id is None: raise ValueError(f"Invalid task run ID.") task_run = await models.TaskRun.where({"id": {"_eq": task_run_id},}).first( { "id": True, "version": True, "state": True, "serialized_state": True, "flow_run": {"id": True, "state": True}, } ) if not task_run: raise ValueError(f"Invalid task run ID: {task_run_id}.") # ------------------------------------------------------ # if the state is running, ensure the flow run is also running # ------------------------------------------------------ if not force and state.is_running() and task_run.flow_run.state != "Running": raise ValueError( f"State update failed for task run ID {task_run_id}: provided " f"a running state but associated flow run {task_run.flow_run.id} is not " "in a running state." ) # ------------------------------------------------------ # if we have cached inputs on the old state, we need to carry them forward # ------------------------------------------------------ if not state.cached_inputs and task_run.serialized_state.get("cached_inputs", None): # load up the old state's cached inputs and apply them to the new state serialized_state = state_schema.load(task_run.serialized_state) state.cached_inputs = serialized_state.cached_inputs # -------------------------------------------------------- # prepare the new state for the database # -------------------------------------------------------- task_run_state = models.TaskRunState( task_run_id=task_run.id, version=(task_run.version or 0) + 1, timestamp=pendulum.now("UTC"), message=state.message, result=state.result, start_time=getattr(state, "start_time", None), state=type(state).__name__, serialized_state=state.serialize(), ) await task_run_state.insert()
def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): # this indicates we are executing a re-run of a mapped pipeline; # in this case, we populate both `map_states` and `cached_inputs` # to ensure the flow runner can properly regenerate the child tasks, # regardless of whether we mapped over an exchanged piece of data # or a non-data-exchanging upstream dependency if len(state.map_states ) == 0 and state.n_map_states > 0: # type: ignore state.map_states = [None] * state.n_map_states # type: ignore state.cached_inputs = { edge.key: state._result # type: ignore for edge, state in upstream_states.items() if edge.key } raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: # compute and set n_map_states n_map_states = min( [ len(s.result) for e, s in upstream_states.items() if e.mapped and s.is_successful() and not s.is_mapped() ] + [ s.n_map_states # type: ignore for e, s in upstream_states.items() if e.mapped and s.is_mapped() ], default=0, ) new_state = Mapped("Ready to proceed with mapping.", n_map_states=n_map_states) raise ENDRUN(new_state)
async def set_task_run_state(task_run_id: str, state: State,) -> None: """ Updates a task run state. Args: - task_run_id (str): the task run id to update - state (State): the new state """ if task_run_id is None: raise ValueError(f"Invalid task run ID.") task_run = await models.TaskRun.where({"id": {"_eq": task_run_id},}).first( {"id": True, "version": True, "state": True, "serialized_state": True,} ) if not task_run: raise ValueError(f"Invalid task run ID: {task_run_id}.") # ------------------------------------------------------ # if we have cached inputs on the old state, we need to carry them forward # ------------------------------------------------------ if not state.cached_inputs and task_run.serialized_state.get("cached_inputs", None): # load up the old state's cached inputs and apply them to the new state serialized_state = state_schema.load(task_run.serialized_state) state.cached_inputs = serialized_state.cached_inputs # -------------------------------------------------------- # prepare the new state for the database # -------------------------------------------------------- task_run_state = models.TaskRunState( task_run_id=task_run.id, version=(task_run.version or 0) + 1, timestamp=pendulum.now("UTC"), message=state.message, result=state.result, start_time=getattr(state, "start_time", None), state=type(state).__name__, serialized_state=state.serialize(), ) await task_run_state.insert()
async def set_task_run_state( task_run_id: str, state: State, version: int = None, flow_run_version: int = None) -> models.TaskRunState: """ Updates a task run state. Args: - task_run_id (str): the task run id to update - state (State): the new state - version (int): a version to enforce version-locking - flow_run_version (int): a flow run version to enforce version-lockgin Returns: - models.TaskRunState """ if task_run_id is None: raise ValueError(f"Invalid task run ID.") where = { "id": { "_eq": task_run_id }, "_or": [ { # EITHER version locking is enabled and the versions match "version": { "_eq": version }, "flow_run": { "version": { "_eq": flow_run_version }, "flow": { "flow_group": { "settings": { "_contains": { "version_locking_enabled": True } } } }, }, }, # OR version locking is not enabled { "flow_run": { "flow": { "flow_group": { "_not": { "settings": { "_contains": { "version_locking_enabled": True } } } } } } }, ], } task_run = await models.TaskRun.where(where).first({ "id": True, "tenant_id": True, "version": True, "state": True, "serialized_state": True, "flow_run": { "id": True, "state": True }, }) if not task_run: raise ValueError(f"State update failed for task run ID {task_run_id}") # ------------------------------------------------------ # if the state is running, ensure the flow run is also running # ------------------------------------------------------ if state.is_running() and task_run.flow_run.state != "Running": raise ValueError( f"State update failed for task run ID {task_run_id}: provided " f"a running state but associated flow run {task_run.flow_run.id} is not " "in a running state.") # ------------------------------------------------------ # if we have cached inputs on the old state, we need to carry them forward # ------------------------------------------------------ if not state.cached_inputs and task_run.serialized_state.get( "cached_inputs", None): # load up the old state's cached inputs and apply them to the new state serialized_state = state_schema.load(task_run.serialized_state) state.cached_inputs = serialized_state.cached_inputs # -------------------------------------------------------- # prepare the new state for the database # -------------------------------------------------------- task_run_state = models.TaskRunState( id=str(uuid.uuid4()), tenant_id=task_run.tenant_id, task_run_id=task_run.id, version=(task_run.version or 0) + 1, timestamp=pendulum.now("UTC"), message=state.message, result=state.result, start_time=getattr(state, "start_time", None), state=type(state).__name__, serialized_state=state.serialize(), ) await task_run_state.insert() # -------------------------------------------------------- # apply downstream updates # -------------------------------------------------------- # FOR RUNNING STATES: # - update the task run heartbeat if state.is_running(): await api.runs.update_task_run_heartbeat(task_run_id=task_run_id) return task_run_state