def test_task_map_with_no_upstream_results_and_a_mapped_state(executor): """ This test makes sure that mapped tasks properly generate children tasks even when run multiple times and without available upstream results. In this test, we run the pipeline from a variety of starting points, ensuring that some upstream results are unavailable and checking that children pipelines are properly regenerated. """ @prefect.task def numbers(): return [1, 2, 3] @prefect.task def plus_one(x): return x + 1 @prefect.task def get_sum(x): return sum(x) with Flow(name="test") as f: n = numbers() x = plus_one.map(n) y = plus_one.map(x) s = get_sum(y) # first run with a missing result from `n` but map_states for `x` state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[ Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n and x state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Retrying(cached_inputs={"x": Result(4)}), ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n, x, and y state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Success(result=5) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12
def test_retry_stores_default_run_count_in_context(): with prefect.context(task_run_count=5): state = Retrying() assert state.run_count == 5
async def reap_zombie_task_runs(self, heartbeat_cutoff: datetime.datetime = None ) -> int: """ Zombie tasks are tasks that claim to be Running, but haven't updated their heartbeat. This method either retries them or marks them as failed. Returns: - int: the number of zombie task runs that were handled """ zombies = 0 heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract( minutes=10) where_clause = await self.get_task_runs_where_clause( heartbeat_cutoff=heartbeat_cutoff) task_runs = await models.TaskRun.where(where_clause).get( selection_set={ "id": True, "flow_run_id": True, "tenant_id": True, # Information about the current flow run state "flow_run": {"state"}, # get information about retries from task "task": {"max_retries", "retry_delay"}, # count the number of retrying states for this task run with_args( "retry_count: states_aggregate", {"where": { "state": { "_eq": "Retrying" } }}, ): { "aggregate": {"count"} }, }, limit=5000, order_by={"updated": EnumValue("desc")}, apply_schema=False, ) if task_runs: self.logger.info( f"Zombie killer found {len(task_runs)} task runs.") # Set task run states to failed for tr in task_runs: try: # if the flow run is running and retries are available, mark as retrying if (tr.flow_run.state == "Running" and tr.retry_count.aggregate.count < (tr.task.max_retries or 0)): message = ( "No heartbeat detected from the remote task; retrying the run." f"This will be retry {tr.retry_count.aggregate.count + 1} of {tr.task.max_retries}." ) retry_delay = orm._as_timedelta(tr.task.retry_delay or "0") await prefect.api.states.set_task_run_state( task_run_id=tr.id, state=Retrying( message=message, run_count=tr.retry_count.aggregate.count + 1, start_time=pendulum.now("UTC") + retry_delay, ), ) # mark failed else: message = "No heartbeat detected from the remote task; marking the run as failed." await prefect.api.states.set_task_run_state( task_run_id=tr.id, state=Failed(message=message), ) # log the state change to the task run await prefect.api.logs.create_logs([ dict( tenant_id=tr.tenant_id, flow_run_id=tr.flow_run_id, task_run_id=tr.id, name=f"{self.logger.name}.TaskRun", message=message, level="ERROR", ) ]) zombies += 1 except ValueError as exc: self.logger.error(exc) if zombies: self.logger.info(f"Addressed {zombies} zombie task runs.") return zombies
def test_retry_stores_run_count(): state = Retrying(run_count=2) assert state.run_count == 2
def test_retry_stores_default_run_count(): state = Retrying() assert state.run_count == 1
[ dict(state=Cancelled(), assert_true={"is_finished"}), dict(state=Cached(), assert_true={"is_cached", "is_finished", "is_successful"}), dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued", "is_scheduled"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict(state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"}), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict(state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"}), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}), dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}), dict(state=ValidationFailed(), assert_true={"is_finished", "is_failed"}), ], ) def test_state_is_methods(state_check): """
def test_scheduled_states_have_default_times(): now = pendulum.now("utc") assert now - Scheduled().start_time < datetime.timedelta(seconds=0.1) assert now - Retrying().start_time < datetime.timedelta(seconds=0.1)
@pytest.mark.parametrize( "state_check", [ dict(state=Cancelled(), assert_true={"is_finished", "is_failed"}), dict(state=Cached(), assert_true={"is_cached", "is_finished", "is_successful"}), dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict( state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"} ), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict( state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"} ), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}), dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}), ], ) def test_state_is_methods(state_check): """ Iterates over all of the "is_*()" methods of the state, asserting that each one is
def check_for_retry(self, state: State, inputs: Dict[str, Result]) -> State: """ Checks to see if a FAILED task should be retried. Args: - state (State): the current state of this task - inputs (Dict[str, Result]): a dictionary of inputs whose keys correspond to the task's `run()` arguments. Returns: - State: the state of the task after running the check """ if state.is_failed(): # Check if the exception is an instance of any of the retry_on types and # do not retry if it is not if (self.task.retry_on and state.result is not None and not any( isinstance(state.result, retry_on_type) for retry_on_type in self.task.retry_on)): self.logger.info( "Task '{name}': Skipping retry. Exception of type {exc_type!r} is " "not an instance of the retry on exception types.".format( name=prefect.context.get("task_full_name", self.task.name), exc_type=type(state.result).__name__, )) return state run_count = prefect.context.get("task_run_count", 1) loop_result = None state_context = None if prefect.context.get("task_loop_count") is not None: loop_result = self.result.from_value( value=prefect.context.get("task_loop_result")) # checkpoint tasks if a result is present, except for when the user has opted # out by disabling checkpointing if (prefect.context.get("checkpointing") is True and self.task.checkpoint is not False and loop_result.value is not None): try: raw_inputs = {k: r.value for k, r in inputs.items()} formatting_kwargs = { **prefect.context.get("parameters", {}).copy(), **prefect.context, **raw_inputs, } loop_result = self.result.write( loop_result.value, **formatting_kwargs) except ResultNotImplementedError: pass state_context = { "_loop_count": prefect.context["task_loop_count"] } if run_count <= self.task.max_retries: start_time = pendulum.now("utc") + self.task.retry_delay msg = "Retrying Task (after attempt {n} of {m})".format( n=run_count, m=self.task.max_retries + 1) retry_state = Retrying( start_time=start_time, context=state_context, message=msg, run_count=run_count, result=loop_result, ) return retry_state return state
class TestRunFlowStep: def test_running_state_finishes(self): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_successful() @pytest.mark.parametrize( "state", [Pending(), Retrying(), Finished(), Success(), Failed(), Skipped()]) def test_other_states_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).get_flow_run_state( state=state, task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=Executor(), ) def test_determine_final_state_has_final_say(self): class MyFlowRunner(FlowRunner): def determine_final_state(self, *args, **kwargs): return Failed("Very specific error message") flow = Flow(name="test", tasks=[Task()]) new_state = MyFlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_failed() assert new_state.message == "Very specific error message" def test_determine_final_state_preserves_running_states_when_tasks_still_running( self, ): task = Task() flow = Flow(name="test", tasks=[task]) old_state = Running() new_state = FlowRunner(flow=flow).get_flow_run_state( state=old_state, task_states={ task: Retrying(start_time=pendulum.now("utc").add(days=1)) }, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state is old_state
# set old heartbeat on flow run and task run await models.FlowRun.where(id=running_flow_run_id).update( set={"heartbeat": pendulum.now("utc").subtract(hours=1)}) await models.TaskRun.where(id=task_run_id).update( set={"heartbeat": pendulum.now("utc").subtract(hours=1)}) assert await ZombieKiller().reap_zombie_task_runs() == 1 task_run = await models.TaskRun.where(id=task_run_id).first({"state"}) assert task_run.state == "Failed" @pytest.mark.parametrize( "state", [ Scheduled(start_time=pendulum.now("UTC").add(hours=1)), Retrying(start_time=pendulum.now("UTC").add(hours=1)), ], ) async def test_zombie_killer_does_not_apply_if_task_run_is_scheduled( running_flow_run_id, task_run_id, state): await api.states.set_task_run_state(task_run_id, state=state) # set old heartbeats await models.FlowRun.where(id=running_flow_run_id).update( set={"heartbeat": pendulum.now("utc").subtract(hours=1)}) await models.TaskRun.where(id=task_run_id).update( set={"heartbeat": pendulum.now("utc").subtract(hours=1)}) assert await ZombieKiller().reap_zombie_task_runs() == 0
def test_state_type_methods_with_retry_state(self): state = Retrying() assert state.is_pending() assert state.is_retrying() assert not state.is_cached() assert not state.is_running() assert not state.is_finished() assert not state.is_skipped() assert state.is_scheduled() assert not state.is_successful() assert not state.is_failed() assert not state.is_mapped() assert not state.is_meta_state()
async def test_start_two_runs_from_b_and_c(self, flow, agent): """ https://github.com/PrefectHQ/cloud/issues/173 This test simultates a scenario in which A is successful, B wants to be retried immediately and C wants to be retried after a delay. We make sure that when B starts, D doesn't run (because C is not finished); and when C starts later, D DOES Run because it loads B's state. """ flow_run_id = await api.runs.create_flow_run(flow_id=flow.server_id) # set the flow run to running await api.states.set_flow_run_state(flow_run_id, state=Running()) # set A to Success await api.states.set_task_run_state( task_run_id=await api.runs.get_or_create_task_run(flow_run_id=flow_run_id, task_id=flow.a.id), state=Success(), ) # set B to Retrying await api.states.set_task_run_state( await api.runs.get_or_create_task_run(flow_run_id=flow_run_id, task_id=flow.b.id), state=Retrying(), ) # ------------------------------------------- # this run should only run B and attempt D await agent.run_scheduled(flow_id=flow.server_id) # wait for states to be written to the db await asyncio.sleep(1.0) fr = await models.FlowRun.where(id=flow_run_id).first( { "serialized_state": True, "task_runs": { "task": {"slug"}, "serialized_state": True }, }, ) task_states = { tr.task.slug: state_schema.load(tr.serialized_state) for tr in fr.task_runs } assert fr.serialized_state["type"] == "Running" assert isinstance(task_states[flow.a.slug], Success) assert isinstance(task_states[flow.b.slug], Success) assert isinstance(task_states[flow.c.slug], Pending) assert isinstance(task_states[flow.d.slug], Pending) # ------------------------------------------- # this run should run C and D # set C to Retrying await api.states.set_task_run_state( await api.runs.get_or_create_task_run(flow_run_id=flow_run_id, task_id=flow.c.id), state=Retrying(), ) await agent.run_scheduled(flow_id=flow.server_id) # wait for states to be written to the db await asyncio.sleep(1.0) fr = await models.FlowRun.where(id=flow_run_id).first( { "serialized_state": True, "task_runs": { "task": {"slug"}, "serialized_state": True }, }, ) task_states = { tr.task.slug: state_schema.load(tr.serialized_state) for tr in fr.task_runs } assert fr.serialized_state["type"] == "Success" assert isinstance(task_states[flow.a.slug], Success) assert isinstance(task_states[flow.b.slug], Success) assert isinstance(task_states[flow.c.slug], Success) assert isinstance(task_states[flow.d.slug], Success)
except ResultNotImplementedError: ======= except NotImplementedError: >>>>>>> prefect clone pass state_context = {"_loop_count": prefect.context["task_loop_count"]} if run_count <= self.task.max_retries: start_time = pendulum.now("utc") + self.task.retry_delay msg = "Retrying Task (after attempt {n} of {m})".format( n=run_count, m=self.task.max_retries + 1 ) retry_state = Retrying( start_time=start_time, context=state_context, message=msg, run_count=run_count, result=loop_result, ) return retry_state return state def check_task_is_looping( self, state: State, inputs: Dict[str, Result] = None, upstream_states: Dict[Edge, State] = None, context: Dict[str, Any] = None, ) -> State: """