def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: new_state = Mapped("Ready to proceed with mapping.") raise ENDRUN(new_state)
def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): # this indicates we are executing a re-run of a mapped pipeline; # in this case, we populate both `map_states` and `cached_inputs` # to ensure the flow runner can properly regenerate the child tasks, # regardless of whether we mapped over an exchanged piece of data # or a non-data-exchanging upstream dependency if len(state.map_states ) == 0 and state.n_map_states > 0: # type: ignore state.map_states = [None] * state.n_map_states # type: ignore state.cached_inputs = { edge.key: state._result # type: ignore for edge, state in upstream_states.items() if edge.key } raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: # compute and set n_map_states n_map_states = min( [ len(s.result) for e, s in upstream_states.items() if e.mapped and s.is_successful() and not s.is_mapped() ] + [ s.n_map_states # type: ignore for e, s in upstream_states.items() if e.mapped and s.is_mapped() ], default=0, ) new_state = Mapped("Ready to proceed with mapping.", n_map_states=n_map_states) raise ENDRUN(new_state)
def cache_result(self, state: State, inputs: Dict[str, Result]) -> State: """ Caches the result of a successful task, if appropriate. Alternatively, if the task is failed, caches the inputs. Tasks are cached if: - task.cache_for is not None - the task state is Successful - the task state is not Skipped (which is a subclass of Successful) Args: - state (State): the current state of this task - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond to the task's `run()` arguments. Returns: - State: the state of the task after running the check """ if (state.is_successful() and not state.is_skipped() and self.task.cache_for is not None): expiration = pendulum.now("utc") + self.task.cache_for cached_state = Cached( result=state._result, hashed_inputs={ key: tokenize(val.value) for key, val in inputs.items() }, cached_result_expiration=expiration, cached_parameters=prefect.context.get("parameters"), message=state.message, ) return cached_state return state
def get_task_run_state( self, state: State, inputs: Dict[str, Result], timeout_handler: Optional[Callable], ) -> State: """ Runs the task and traps any signals or errors it raises. Also checkpoints the result of a successful task, if `task.checkpoint` is `True`. Args: - state (State): the current state of this task - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond to the task's `run()` arguments. - timeout_handler (Callable, optional): function for timing out task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to `prefect.utilities.executors.main_thread_timeout` Returns: - State: the state of the task after running the check Raises: - signals.PAUSE: if the task raises PAUSE - ENDRUN: if the task is not ready to run """ if not state.is_running(): self.logger.debug( "Task '{name}': can't run task because it's not in a " "Running state; ending run.".format(name=prefect.context.get( "task_full_name", self.task.name))) raise ENDRUN(state) try: self.logger.debug( "Task '{name}': Calling task.run() method...".format( name=prefect.context.get("task_full_name", self.task.name))) timeout_handler = timeout_handler or main_thread_timeout raw_inputs = {k: r.value for k, r in inputs.items()} result = timeout_handler(self.task.run, timeout=self.task.timeout, **raw_inputs) # inform user of timeout except TimeoutError as exc: if prefect.context.get("raise_on_exception"): raise exc state = TimedOut("Task timed out during execution.", result=exc, cached_inputs=inputs) return state result = Result(value=result, result_handler=self.result_handler) state = Success(result=result, message="Task run succeeded.") if state.is_successful() and self.task.checkpoint is True: state._result.store_safe_value() return state
def test_no_residential_etl_tasks_fail(etl_flow_state: State) -> None: """No etl tasks fail. Args: etl_flow_state (State): A Prefect State object containing flow run information """ assert etl_flow_state.is_successful()
def checkpoint_handler(task_runner: DSTaskRunner, old_state: State, new_state: State) -> State: """ A handler designed to implement result caching by filename. If the result handler's ``read`` method can be successfully run, this handler loads the result of that method as the task result and sets the task state to ``Success``. Similarly, on successful completion of the task, if the task was actually run and not loaded from cache, this handler will apply the result handler's ``write`` method to the task. Parameters ---------- task_runner : instance of DSTaskRunner The task runner associated with the flow the handler is used in. old_state : instance of prefect.engine.state.State The current state of the task. new_state : instance of prefect.engine.state.State The expected new state of the task. Returns ------- new_state : instance of prefect.engine.state.State The actual new state of the task. """ if "PREFECT__FLOWS__CHECKPOINTING" in os.environ and os.environ["PREFECT__FLOWS__CHECKPOINTING"] == "true": raise AttributeError("Cannot use standard prefect checkpointing with this handler") if task_runner.result_handler is not None and old_state.is_pending() and new_state.is_running(): if not hasattr(task_runner, "upstream_states"): raise TypeError( "upstream_states not found in task runner. Make sure to use " "prefect_ds.task_runner.DSTaskRunner." ) input_mapping = _create_input_mapping(task_runner.upstream_states) try: data = task_runner.task.result_handler.read(input_mapping=input_mapping) except FileNotFoundError: return new_state except TypeError: # unexpected argument input_mapping raise TypeError( "Result handler could not accept input_mapping argument. " "Please ensure that you are using a handler from prefect_ds." ) result = Result(value=data, result_handler=task_runner.task.result_handler) state = Success(result=result, message="Task loaded from disk.") return state if task_runner.result_handler is not None and old_state.is_running() and new_state.is_successful(): input_mapping = _create_input_mapping(task_runner.upstream_states) task_runner.task.result_handler.write(new_state.result, input_mapping=input_mapping) return new_state
def get_task_run_state( self, state: State, inputs: Dict[str, Result], timeout_handler: Optional[Callable] = None, ) -> State: """ Runs the task and traps any signals or errors it raises. Also checkpoints the result of a successful task, if `task.checkpoint` is `True`. Args: - state (State): the current state of this task - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond to the task's `run()` arguments. - timeout_handler (Callable, optional): function for timing out task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to `prefect.utilities.executors.timeout_handler` Returns: - State: the state of the task after running the check Raises: - signals.PAUSE: if the task raises PAUSE - ENDRUN: if the task is not ready to run """ if not state.is_running(): self.logger.debug( "Task '{name}': can't run task because it's not in a " "Running state; ending run.".format(name=prefect.context.get( "task_full_name", self.task.name))) raise ENDRUN(state) try: self.logger.debug( "Task '{name}': Calling task.run() method...".format( name=prefect.context.get("task_full_name", self.task.name))) timeout_handler = (timeout_handler or prefect.utilities.executors.timeout_handler) raw_inputs = {k: r.value for k, r in inputs.items()} if getattr(self.task, "log_stdout", False): with redirect_stdout( prefect.utilities.logging.RedirectToLog( self.logger)): # type: ignore result = timeout_handler(self.task.run, timeout=self.task.timeout, **raw_inputs) else: result = timeout_handler(self.task.run, timeout=self.task.timeout, **raw_inputs) except KeyboardInterrupt: self.logger.debug("Interrupt signal raised, cancelling task run.") state = Cancelled( message="Interrupt signal raised, cancelling task run.") return state # inform user of timeout except TimeoutError as exc: if prefect.context.get("raise_on_exception"): raise exc state = TimedOut("Task timed out during execution.", result=exc, cached_inputs=inputs) return state except signals.LOOP as exc: new_state = exc.state assert isinstance(new_state, Looped) new_state.result = Result(value=new_state.result, result_handler=self.result_handler) new_state.cached_inputs = inputs new_state.message = exc.state.message or "Task is looping ({})".format( new_state.loop_count) return new_state result = Result(value=result, result_handler=self.result_handler) state = Success(result=result, message="Task run succeeded.", cached_inputs=inputs) ## checkpoint tasks if a result_handler is present, except for when the user has opted out by disabling checkpointing if (state.is_successful() and prefect.context.get("checkpointing") is True and self.task.checkpoint is not False and self.result_handler is not None): state._result.store_safe_value() return state