def test_manual_only_with_resume_in_context(): """manual only passes when resume = True in context""" with context(resume=True): assert triggers.manual_only( generate_states(success=1, failed=1, skipped=1))
def test_manual_only_with_empty_set_and_resume_in_context(): with context(resume=True): assert triggers.manual_only(set()) is True
def get_task_run_state( self, state: State, inputs: Dict[str, Result], timeout_handler: Optional[Callable], ) -> State: """ Runs the task and traps any signals or errors it raises. Also checkpoints the result of a successful task, if `task.checkpoint` is `True`. Args: - state (State): the current state of this task - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond to the task's `run()` arguments. - timeout_handler (Callable, optional): function for timing out task execution, with call signature `handler(fn, *args, **kwargs)`. Defaults to `prefect.utilities.executors.main_thread_timeout` Returns: - State: the state of the task after running the check Raises: - signals.PAUSE: if the task raises PAUSE - ENDRUN: if the task is not ready to run """ if not state.is_running(): self.logger.debug( "Task '{name}': can't run task because it's not in a " "Running state; ending run.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) raise ENDRUN(state) try: self.logger.debug( "Task '{name}': Calling task.run() method...".format( name=prefect.context.get("task_full_name", self.task.name) ) ) timeout_handler = timeout_handler or main_thread_timeout raw_inputs = {k: r.value for k, r in inputs.items()} with prefect.context(logger=self.task.logger): result = timeout_handler( self.task.run, timeout=self.task.timeout, **raw_inputs ) # inform user of timeout except TimeoutError as exc: if prefect.context.get("raise_on_exception"): raise exc state = TimedOut( "Task timed out during execution.", result=exc, cached_inputs=inputs ) return state result = Result(value=result, result_handler=self.result_handler) state = Success(result=result, message="Task run succeeded.") ## only checkpoint tasks if running in cloud if ( state.is_successful() and prefect.context.get("cloud") is True and self.task.checkpoint is True ): state._result.store_safe_value() return state
def run( self, state: State = None, task_states: Dict[Task, State] = None, return_tasks: Iterable[Task] = None, parameters: Dict[str, Any] = None, task_runner_state_handlers: Iterable[Callable] = None, executor: "prefect.executors.Executor" = None, context: Dict[str, Any] = None, task_contexts: Dict[Task, Dict[str, Any]] = None, ) -> State: """ The main endpoint for FlowRunners. Calling this method will perform all computations contained within the Flow and return the final state of the Flow. Args: - state (State, optional): starting state for the Flow. Defaults to `Pending` - task_states (dict, optional): dictionary of task states to begin computation with, with keys being Tasks and values their corresponding state - return_tasks ([Task], optional): list of Tasks to include in the final returned Flow state. Defaults to `None` - parameters (dict, optional): dictionary of any needed Parameter values, with keys being strings representing Parameter names and values being their corresponding values - task_runner_state_handlers (Iterable[Callable], optional): A list of state change handlers that will be provided to the task_runner, and called whenever a task changes state. - executor (Executor, optional): executor to use when performing computation; defaults to the executor specified in your prefect configuration - context (Dict[str, Any], optional): prefect.Context to use for execution to use for each Task run - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be provided to each task Returns: - State: `State` representing the final post-run state of the `Flow`. """ self.logger.info("Beginning Flow run for '{}'".format(self.flow.name)) # make copies to avoid modifying user inputs parameters = dict(parameters or {}) task_states = dict(task_states or {}) task_contexts = dict(task_contexts or {}) # Default to global context, with provided context as override run_context = dict(prefect.context) run_context.update(context or {}) if executor is None: # Use the executor on the flow, if configured executor = getattr(self.flow, "executor", None) if executor is None: executor = prefect.engine.get_default_executor_class()() self.logger.debug("Using executor type %s", type(executor).__name__) try: state, task_states, run_context, task_contexts = self.initialize_run( state=state, task_states=task_states, context=run_context, task_contexts=task_contexts, parameters=parameters, ) with prefect.context(run_context): state = self.check_flow_is_pending_or_running(state) state = self.check_flow_reached_start_time(state) state = self.set_flow_to_running(state) state = self.get_flow_run_state( state, task_states=task_states, task_contexts=task_contexts, return_tasks=return_tasks, task_runner_state_handlers=task_runner_state_handlers, executor=executor, ) except ENDRUN as exc: state = exc.state # All other exceptions are trapped and turned into Failed states except Exception as exc: self.logger.exception( "Unexpected error while running flow: {}".format(repr(exc))) if run_context.get("raise_on_exception"): raise exc new_state = Failed( message="Unexpected error while running flow: {}".format( repr(exc)), result=exc, ) state = self.handle_state_change(state or Pending(), new_state) return state
def kube_secret(): with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict(KUBERNETES_API_KEY="test_key")): yield
def run( self, state: State = None, upstream_states: Dict[Edge, State] = None, context: Dict[str, Any] = None, executor: "prefect.engine.executors.Executor" = None, ) -> State: """ The main endpoint for TaskRunners. Calling this method will conditionally execute `self.task.run` with any provided inputs, assuming the upstream dependencies are in a state which allow this Task to run. Args: - state (State, optional): initial `State` to begin task run from; defaults to `Pending()` - upstream_states (Dict[Edge, State]): a dictionary representing the states of any tasks upstream of this one. The keys of the dictionary should correspond to the edges leading to the task. - context (dict, optional): prefect Context to use for execution - executor (Executor, optional): executor to use when performing computation; defaults to the executor specified in your prefect configuration Returns: - `State` object representing the final post-run state of the Task """ upstream_states = upstream_states or {} context = context or {} map_index = context.setdefault("map_index", None) context["task_full_name"] = "{name}{index}".format( name=self.task.name, index=("" if map_index is None else "[{}]".format(map_index)), ) if executor is None: executor = prefect.engine.get_default_executor_class()() # if mapped is true, this task run is going to generate a Mapped state. It won't # actually run, but rather spawn children tasks to map over its inputs. We # detect this case by checking for: # - upstream edges that are `mapped` # - no `map_index` (which indicates that this is the child task, not the parent) mapped = any([e.mapped for e in upstream_states]) and map_index is None task_inputs = {} # type: Dict[str, Any] self.logger.info( "Task '{name}': Starting task run...".format(name=context["task_full_name"]) ) try: # initialize the run state, context = self.initialize_run(state, context) # run state transformation pipeline with prefect.context(context): # check to make sure the task is in a pending state state = self.check_task_is_ready(state) # check if the task has reached its scheduled time state = self.check_task_reached_start_time(state) # Tasks never run if the upstream tasks haven't finished state = self.check_upstream_finished( state, upstream_states=upstream_states ) # if the task is mapped, process the mapped children and exit if mapped: state = self.run_mapped_task( state=state, upstream_states=upstream_states, context=context, executor=executor, ) state = self.wait_for_mapped_task(state=state, executor=executor) self.logger.debug( "Task '{name}': task has been mapped; ending run.".format( name=context["task_full_name"] ) ) raise ENDRUN(state) # check if any upstream tasks skipped (and if we need to skip) state = self.check_upstream_skipped( state, upstream_states=upstream_states ) # retrieve task inputs from upstream and also explicitly passed inputs task_inputs = self.get_task_inputs( state=state, upstream_states=upstream_states ) # check to see if the task has a cached result state = self.check_task_is_cached(state, inputs=task_inputs) # check if the task's trigger passes # triggers can raise Pauses, which require task_inputs to be available for caching # so we run this after the previous step state = self.check_task_trigger(state, upstream_states=upstream_states) # set the task state to running state = self.set_task_to_running(state) # run the task state = self.get_task_run_state( state, inputs=task_inputs, timeout_handler=executor.timeout_handler ) # cache the output, if appropriate state = self.cache_result(state, inputs=task_inputs) # check if the task needs to be retried state = self.check_for_retry(state, inputs=task_inputs) # for pending signals, including retries and pauses we need to make sure the # task_inputs are set except (ENDRUN, signals.PrefectStateSignal) as exc: if exc.state.is_pending(): exc.state.cached_inputs = task_inputs or {} # type: ignore state = exc.state if not isinstance(exc, ENDRUN) and prefect.context.get( "raise_on_exception" ): raise exc except Exception as exc: msg = "Task '{name}': unexpected error while running task: {exc}".format( name=context["task_full_name"], exc=repr(exc) ) self.logger.error(msg) state = Failed(message=msg, result=exc) if prefect.context.get("raise_on_exception"): raise exc self.logger.info( "Task '{name}': finished task run for task with final state: '{state}'".format( name=context["task_full_name"], state=type(state).__name__ ) ) return state
def run( self, state: State = None, upstream_states: Dict[Edge, State] = None, context: Dict[str, Any] = None, is_mapped_parent: bool = False, ) -> State: """ The main endpoint for TaskRunners. Calling this method will conditionally execute `self.task.run` with any provided inputs, assuming the upstream dependencies are in a state which allow this Task to run. Additionally, this method will wait and perform Task retries which are scheduled for <= 1 minute in the future. Args: - state (State, optional): initial `State` to begin task run from; defaults to `Pending()` - upstream_states (Dict[Edge, State]): a dictionary representing the states of any tasks upstream of this one. The keys of the dictionary should correspond to the edges leading to the task. - context (dict, optional): prefect Context to use for execution - is_mapped_parent (bool): a boolean indicating whether this task run is the run of a parent mapped task Returns: - `State` object representing the final post-run state of the Task """ with prefect.context(context or {}): end_state = super().run( state=state, upstream_states=upstream_states, context=context, is_mapped_parent=is_mapped_parent, ) while (end_state.is_retrying() or end_state.is_queued()) and ( end_state.start_time <= pendulum.now("utc").add(minutes=10) # type: ignore ): assert isinstance(end_state, (Retrying, Queued)) naptime = max( (end_state.start_time - pendulum.now("utc")).total_seconds(), 0 ) time.sleep(naptime) # send heartbeat on each iteration to let API know task run is still alive self.client.update_task_run_heartbeat( task_run_id=prefect.context.get("task_run_id") ) # mapped children will retrieve their latest info inside # initialize_run(), but we can load up-to-date versions # for all other task runs here if prefect.context.get("map_index") in [-1, None]: task_run_info = self.client.get_task_run_info( flow_run_id=prefect.context.get("flow_run_id"), task_id=prefect.context.get("task_id"), map_index=prefect.context.get("map_index"), ) # if state was provided, keep it; otherwise use the one from db context.update(task_run_version=task_run_info.version) # type: ignore end_state = super().run( state=end_state, upstream_states=upstream_states, context=context, is_mapped_parent=is_mapped_parent, ) return end_state
def test_deep_map_with_a_retry(monkeypatch): """ Creates a situation in which a deeply-mapped Flow encounters a one-time error in one of the middle layers. Running the flow a second time should resolve the error. DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(seconds=0) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id), ] + [ TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=executor) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert len([ tr for tr in client.task_runs.values() if tr.task_id == t.id ]) == 4 # t2's first child task should be retrying t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert isinstance(t2_0.state, Retrying) # t3's first child task should be pending t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_pending() # RUN A SECOND TIME with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=executor) # t2's first child task should be successful t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert t2_0.state.is_successful() # t3's first child task should be successful t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_successful()
def run_with_ctx(*args: Any, _ctx_dict: dict, **kwargs: Any) -> Any: with prefect.context(_ctx_dict): return fn(*args, **kwargs)
def test_retry_stores_default_run_count_in_context(): with prefect.context(task_run_count=5): state = Retrying() assert state.run_count == 5
def test_looped_stores_default_loop_count_in_context(): with prefect.context(task_loop_count=5): state = Looped() assert state.loop_count == 5