def test_flow_run_method_returns_task_states_even_if_it_doesnt_run(): # https://github.com/PrefectHQ/prefect/issues/19 flow = Flow(name="test") task1 = SuccessTask() task2 = ErrorTask() flow.add_edge(task1, task2) flow_state = flow.run(state=Success()) assert flow_state.is_successful() assert flow_state.result[task1].is_pending() assert flow_state.result[task2].is_pending()
def test_flow_runner_makes_copy_of_task_results_dict(): """ Ensure the flow runner copies the task_results dict rather than modifying it inplace """ flow = Flow(name="test") t1, t2 = Task(), Task() flow.add_edge(t1, t2) task_states = {t1: Pending()} state = flow.run(task_states=task_states) assert state.result[t1] == Success(result=None) assert task_states == {t1: Pending()}
def test_flow_runner_does_not_return_task_states_when_it_doesnt_run(): flow = Flow(name="test") task1 = SuccessTask() task2 = ErrorTask() flow.add_edge(task1, task2) flow_state = FlowRunner(flow=flow).run( state=Success(result=5), return_tasks=[task1, task2] ) assert isinstance(flow_state, Success) assert flow_state.result == 5
def test_load_results_from_upstream_reads_secret_results(self, cloud_api): secret_result = SecretResult( prefect.tasks.secrets.PrefectSecret(name="foo")) state = Success(result=PrefectResult(location="foo")) with prefect.context(secrets=dict(foo=42)): edge = Edge(Task(result=secret_result), 2, key="x") new_state, upstreams = CloudTaskRunner(task=Task()).load_results( state=Pending(), upstream_states={edge: state}) assert upstreams[edge].result == 42
def test_load_results_from_upstream_reads_results(self, cloud_api): result = PrefectResult(location="1") state = Success(result=result) assert result.value is None t = Task(result=PrefectResult()) edge = Edge(t, 2, key="x") new_state, upstreams = CloudTaskRunner(task=Task()).load_results( state=Pending(), upstream_states={edge: state} ) assert upstreams[edge].result == 1
def test_creates_subprocess_correctly(self, cloud_mocks, mocks, include_local_env): # Returned a scheduled flow run to start cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() # Return a finished flow run after the first iteration cloud_mocks.FlowRunView().get_latest().state = Success() execute_flow_run_in_subprocess("flow-run-id", include_local_env=include_local_env) # Should pass the correct flow run id to wait for mocks.wait_for_flow_run_start_time.assert_called_once_with( "flow-run-id") # Merge the starting env and the env generated for a flow run base_env = os.environ.copy() if include_local_env else {} generated_env = { "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS": "True", "PREFECT__LOGGING__LEVEL": "INFO", "PREFECT__LOGGING__FORMAT": "[%(asctime)s] %(levelname)s - %(name)s | %(message)s", "PREFECT__LOGGING__DATEFMT": "%Y-%m-%d %H:%M:%S%z", "PREFECT__BACKEND": "cloud", "PREFECT__CLOUD__API": "https://api.prefect.io", "PREFECT__CLOUD__TENANT_ID": "", "PREFECT__CLOUD__API_KEY": cloud_mocks.Client().api_key, "PREFECT__CONTEXT__FLOW_RUN_ID": "flow-run-id", "PREFECT__CONTEXT__FLOW_ID": cloud_mocks.FlowRunView.from_flow_run_id().flow_id, "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", } expected_env = {**base_env, **generated_env} # Calls the correct command w/ environment variables mocks.subprocess.run.assert_called_once_with( [sys.executable, "-m", "prefect", "execute", "flow-run"], env=expected_env, ) # Return code is checked mocks.subprocess.run().check_returncode.assert_called_once()
def test_resource_tasks_always_rerun_on_flow_restart(): @resource_manager class Resource: def __init__(self): nonlocal init_run init_run = True def setup(self): nonlocal setup_run setup_run = True return 1 def cleanup(self, val): nonlocal cleanup_run cleanup_run = True with Flow("test") as flow: context = Resource() with context as resource: a = inc(resource) b = inc(resource) c = add(a, b) # rerun from partial completion task_states = { context.init_task: Success(result=Resource.resource_class()), context.setup_task: Success(), context.cleanup_task: Success(), a: Success(result=2), } init_run = setup_run = cleanup_run = False res = flow.run(task_states=task_states) assert res.is_successful() assert res.result[a].result == 2 assert res.result[b].result == 2 assert res.result[c].result == 4 assert not init_run # existing result used assert setup_run # setup re-run assert cleanup_run # cleanup re-run
def test_flow_runner_calls_client_the_approriate_number_of_times(client): flow = prefect.Flow(name="test") res = CloudFlowRunner(flow=flow).run() ## assertions assert client.get_flow_run_info.call_count == 1 # one time to pull latest state assert client.set_flow_run_state.call_count == 2 # Pending -> Running -> Success states = [ call[1]["state"] for call in client.set_flow_run_state.call_args_list ] assert states == [Running(), Success(result={})]
def test_load_results_from_upstream_reads_results_using_upstream_handlers( self): class CustomResult(Result): def read(self, *args, **kwargs): return "foo-bar-baz".split("-") state = Success(result=PrefectResult(location="1")) edge = Edge(Task(result=CustomResult()), 2, key="x") new_state, upstreams = CloudTaskRunner(task=Task()).load_results( state=Pending(), upstream_states={edge: state}, ) assert upstreams[edge].result == ["foo", "bar", "baz"]
def test_state_equality(): assert State() == State() assert Success() == Success() assert Success(result=1) == Success(result=1) assert not State() == Success() assert not Success(result=1) == Success(result=2) assert Pending(cached_inputs=dict(x=1)) == Pending(cached_inputs=dict(x=1)) assert not Pending(cached_inputs=dict(x=1)) == Pending(cached_inputs=dict( x=2)) assert not Pending(cached_inputs=dict(x=1)) == Pending(cached_inputs=dict( y=1))
def test_viz_if_flow_state_provided(self, state): import graphviz ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) with patch.dict("sys.modules", IPython=ipython): t = Task(name="a_nice_task") f = Flow(name="test") f.add_task(t) graph = f.visualize(flow_state=Success(result={t: state})) assert "label=a_nice_task" in graph.source assert 'color="' + state.color + '80"' in graph.source assert "shape=ellipse" in graph.source
async def test_set_task_run_state_with_result(self, run_query, task_run_id): result = Result(10, result_handler=JSONResultHandler()) result.store_safe_value() state = Success(result=result) result = await run_query( query=self.mutation, variables=dict( input=dict( states=[ dict( task_run_id=task_run_id, version=0, state=state.serialize() ) ] ) ), ) tr = await models.TaskRun.where( id=result.data.set_task_run_states.states[0].id ).first({"state", "version"}) assert tr.version == 1 assert tr.state == "Success"
def test_model_cleaning_pipeline_save(data_dir, lineup_stats, stats, modeldata, tmpdir): """Test persisting model save data.""" location = tmpdir.mkdir("data") flow = gen_pipeline() # Generate placeholder data for calls that require pulling from the NBA API task_states = { flow.get_tasks(name="Load lineup data")[0]: Success(message="Skipping", result=None), flow.get_tasks(name="Get overall dataset from the Factory")[0]: Success( message="Substituted result", result=stats ), flow.get_tasks(name="Get lineup dataset from the Factory")[0]: Success( message="Substituted result", result=lineup_stats ) } run_pipeline( flow=flow, data_dir=str(data_dir / "2018-19"), output_dir=str(location), save_data=True, mode="model", Season="2018-19", GameDate="12/25/2018", task_states=task_states, ) df = pd.read_csv( Path(str(location), "model-data", "data_00218DUMMY1.csv"), sep="|", index_col=0, dtype={ "GAME_ID": str, "HOMEDESCRIPTION": str, "VISITORDESCRIPTION": str } ) df["GAME_DATE_EST"] = pd.to_datetime(df["GAME_DATE_EST"]) assert df.equals(modeldata)
def test_loops_until_flow_run_is_finished(self, cloud_mocks, mocks): cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() cloud_mocks.FlowRunView.from_flow_run_id().get_latest.side_effect = [ MagicMock(state=Running()), MagicMock(state=Running()), MagicMock(state=Success()), ] execute_flow_run_in_subprocess("flow-run-id") # Ran the subprocess twice assert mocks.subprocess.run.call_count == 2 # Waited each time assert mocks.wait_for_flow_run_start_time.call_count == 2
def checkpoint_handler(task_runner: DSTaskRunner, old_state: State, new_state: State) -> State: """ A handler designed to implement result caching by filename. If the result handler's ``read`` method can be successfully run, this handler loads the result of that method as the task result and sets the task state to ``Success``. Similarly, on successful completion of the task, if the task was actually run and not loaded from cache, this handler will apply the result handler's ``write`` method to the task. Parameters ---------- task_runner : instance of DSTaskRunner The task runner associated with the flow the handler is used in. old_state : instance of prefect.engine.state.State The current state of the task. new_state : instance of prefect.engine.state.State The expected new state of the task. Returns ------- new_state : instance of prefect.engine.state.State The actual new state of the task. """ if "PREFECT__FLOWS__CHECKPOINTING" in os.environ and os.environ["PREFECT__FLOWS__CHECKPOINTING"] == "true": raise AttributeError("Cannot use standard prefect checkpointing with this handler") if task_runner.result_handler is not None and old_state.is_pending() and new_state.is_running(): if not hasattr(task_runner, "upstream_states"): raise TypeError( "upstream_states not found in task runner. Make sure to use " "prefect_ds.task_runner.DSTaskRunner." ) input_mapping = _create_input_mapping(task_runner.upstream_states) try: data = task_runner.task.result_handler.read(input_mapping=input_mapping) except FileNotFoundError: return new_state except TypeError: # unexpected argument input_mapping raise TypeError( "Result handler could not accept input_mapping argument. " "Please ensure that you are using a handler from prefect_ds." ) result = Result(value=data, result_handler=task_runner.task.result_handler) state = Success(result=result, message="Task loaded from disk.") return state if task_runner.result_handler is not None and old_state.is_running() and new_state.is_successful(): input_mapping = _create_input_mapping(task_runner.upstream_states) task_runner.task.result_handler.write(new_state.result, input_mapping=input_mapping) return new_state
def test_secrets_are_rerun_on_restart(): @prefect.task def identity(x): return x with Flow("test") as flow: secret = PrefectSecret("key") val = identity(secret) with prefect.context(secrets={"key": "val"}): state = FlowRunner(flow=flow).run(task_states={secret: Success()}, return_tasks=[val]) assert state.is_successful() assert state.result[val].result == "val"
def test_mapped_will_use_partial_existing_map_states_if_available(self, executor): with Flow(name="test") as flow: res = ReturnTask().map([1, 1]) state = FlowRunner(flow=flow).run( return_tasks=[res], executor=executor, task_states={res: Mapped(map_states=[None, Success(result=100)])}, ) assert state.is_failed() assert state.result[res].map_states[0].is_failed() assert state.result[res].map_states[1].is_successful() assert state.result[res].map_states[1].result == 100
async def test_set_multiple_flow_run_states( self, run_query, flow_run_id, flow_run_id_2, flow_run_id_3 ): result = await run_query( query=self.mutation, variables=dict( input=dict( states=[ dict( flow_run_id=flow_run_id, version=1, state=Running().serialize(), ), dict( flow_run_id=flow_run_id_2, version=10, state=Success().serialize(), ), dict( flow_run_id=flow_run_id_3, version=3, state=Retrying().serialize(), ), ] ) ), ) assert result.data.set_flow_run_states.states == [ {"id": flow_run_id, "status": "SUCCESS", "message": None}, {"id": flow_run_id_2, "status": "SUCCESS", "message": None}, {"id": flow_run_id_3, "status": "SUCCESS", "message": None}, ] fr1 = await models.FlowRun.where( id=result.data.set_flow_run_states.states[0].id ).first({"state", "version"}) assert fr1.version == 2 assert fr1.state == "Running" fr2 = await models.FlowRun.where( id=result.data.set_flow_run_states.states[1].id ).first({"state", "version"}) assert fr2.version == 3 assert fr2.state == "Success" fr3 = await models.FlowRun.where( id=result.data.set_flow_run_states.states[2].id ).first({"state", "version"}) assert fr3.version == 4 assert fr3.state == "Retrying"
async def test_set_multiple_task_run_states(self, run_query, task_run_id, task_run_id_2, task_run_id_3, running_flow_run_id): result = await run_query( query=self.mutation, variables=dict(input=dict(states=[ dict(task_run_id=task_run_id, state=Running().serialize()), dict(task_run_id=task_run_id_2, state=Success().serialize()), dict( task_run_id=task_run_id_3, version=1, state=Retrying().serialize(), ), ])), ) assert result.data.set_task_run_states.states == [ { "id": task_run_id, "status": "SUCCESS", "message": None }, { "id": task_run_id_2, "status": "SUCCESS", "message": None }, { "id": task_run_id_3, "status": "SUCCESS", "message": None }, ] tr1 = await models.TaskRun.where( id=result.data.set_task_run_states.states[0].id ).first({"state", "version"}) assert tr1.version == 2 assert tr1.state == "Running" tr2 = await models.TaskRun.where( id=result.data.set_task_run_states.states[1].id ).first({"state", "version"}) assert tr2.version == 3 assert tr2.state == "Success" tr3 = await models.TaskRun.where( id=result.data.set_task_run_states.states[2].id ).first({"state", "version"}) assert tr3.version == 3 assert tr3.state == "Retrying"
class TestCheckFlowPendingOrRunning: @pytest.mark.parametrize("state", [Pending(), Running(), Retrying(), Scheduled()]) def test_pending_or_running_are_ok(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state) assert new_state is state @pytest.mark.parametrize( "state", [Finished(), Success(), Failed(), Skipped(), State()] ) def test_not_pending_or_running_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
def test_task_runner_preserves_location_of_inputs_when_retrying( self, client): """ If a user opts out of checkpointing via checkpoint=False, we don't want to surprise them by storing the result in cached_inputs. This test ensures that whatever location is provided to a downstream task is the one that is used. """ @prefect.task(max_retries=1, retry_delay=datetime.timedelta(days=1)) def add(x, y): return x + y x = PrefectResult(value=1) y = PrefectResult(value="0", location="foo") state = Pending(cached_inputs=dict(x=x, y=y)) x_state = Success() y_state = Success() upstream_states = { Edge(Task(), Task(), key="x"): x_state, Edge(Task(), Task(), key="y"): y_state, } res = CloudTaskRunner(task=add).run(state=state, upstream_states=upstream_states) ## assertions assert client.get_task_run_info.call_count == 0 # never called assert (client.set_task_run_state.call_count == 3 ) # Pending -> Running -> Failed -> Retrying states = [ call[1]["state"] for call in client.set_task_run_state.call_args_list ] assert states[0].is_running() assert states[1].is_failed() assert isinstance(states[2], Retrying) assert states[2].cached_inputs["x"].location is None assert states[2].cached_inputs["y"].location == "foo"
class TestRunFlowStep: def test_running_state_finishes(self): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_successful() @pytest.mark.parametrize( "state", [Pending(), Retrying(), Finished(), Success(), Failed(), Skipped()]) def test_other_states_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).get_flow_run_state( state=state, task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=Executor(), ) def test_determine_final_state_has_final_say(self): class MyFlowRunner(FlowRunner): def determine_final_state(self, *args, **kwargs): return Failed("Very specific error message") flow = Flow(name="test", tasks=[Task()]) new_state = MyFlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_failed() assert new_state.message == "Very specific error message"
def test_cloud_task_runner_handles_retries_with_queued_states_from_cloud(client): calls = [] def queued_mock(*args, **kwargs): calls.append(kwargs) # first retry attempt will get queued if len(calls) == 4: return Queued() # immediate start time else: return kwargs.get("state") client.set_task_run_state = queued_mock @prefect.task( max_retries=2, retry_delay=datetime.timedelta(seconds=0), result_handler=ResultHandler(), ) def tagged_task(x): if prefect.context.get("task_run_count", 1) == 1: raise ValueError("gimme a sec") return x upstream_result = Result(value=42, result_handler=JSONResultHandler()) res = CloudTaskRunner(task=tagged_task).run( context={"task_run_version": 1}, state=None, upstream_states={ Edge(Task(), tagged_task, key="x"): Success(result=upstream_result) }, executor=prefect.engine.executors.LocalExecutor(), ) assert res.is_successful() assert res.result == 42 assert ( len(calls) == 6 ) # Running -> Failed -> Retrying -> Queued -> Running -> Success assert [type(c["state"]).__name__ for c in calls] == [ "Running", "Failed", "Retrying", "Running", "Running", "Success", ] # ensures result handler was called and persisted assert calls[2]["state"].cached_inputs["x"].safe_value.value == "42"
def test_viz_reflects_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)) ) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: res = add.map(x=list_task, y=8) graph = f.visualize( flow_state=Success(result={res: map_state, list_task: Success()}) ) # one colored node for each mapped result assert ( 'label="a_nice_task <map>" color="{success}80"'.format( success=Success.color ) in graph.source ) assert ( 'label="a_nice_task <map>" color="{failed}80"'.format(failed=Failed.color) in graph.source ) assert ( 'label=a_list_task color="{success}80"'.format(success=Success.color) in graph.source ) assert 'label=8 color="#00000080"' in graph.source # two edges for each input to add() for var in ["x", "y"]: for index in [0, 1]: assert "{0} [label={1} style=dashed]".format(index, var) in graph.source
def test_writes_checkpointed_file_to_disk_on_success(self, tmp_path): result_handler = PandasResultHandler(tmp_path / "dummy.csv", "csv", write_kwargs={"index": False}) task = Task(name="Task", result_handler=result_handler) expected_result = pd.DataFrame({"one": [1, 2, 3], "two": [4, 5, 6]}) task_runner = DSTaskRunner(task) task_runner.upstream_states = {} old_state = Running() new_state = Success(result=expected_result) dsh.checkpoint_handler(task_runner, old_state, new_state) actual_result = pd.read_csv(tmp_path / "dummy.csv") pd.testing.assert_frame_equal(expected_result, actual_result)
def test_load_results_from_upstream_reads_cached_inputs_using_upstream_results( self, ): class CustomResult(Result): def read(self, *args, **kwargs): self.value = 99 return self result = PrefectResult(location="1") state = Pending(cached_inputs=dict(x=result)) edge = Edge(Task(result=CustomResult()), 2, key="x") new_state, upstreams = CloudTaskRunner(task=Task( result=PrefectResult())).load_results( state=state, upstream_states={edge: Success(result=result)}) assert new_state.cached_inputs["x"].value == 99
async def test_set_running_task_run_state_works_when_flow_run_is_not_running_if_force( self, flow_run_id, task_run_id, ): await states.set_flow_run_state(flow_run_id, state=Success()) await states.set_task_run_state(task_run_id=task_run_id, state=Running(), force=True) query = await models.TaskRun.where(id=task_run_id).first( {"version", "state", "serialized_state"}) assert query.version == 1 assert query.state == "Running" assert query.serialized_state["type"] == "Running"
def test_task_runner_sets_mapped_state_prior_to_executor_mapping(client): upstream_states = { Edge(Task(), Task(), key="foo", mapped=True): Success(result=[1, 2]) } with pytest.raises(ENDRUN) as exc: CloudTaskRunner(task=Task()).check_task_ready_to_map( state=Pending(), upstream_states=upstream_states) ## assertions assert client.get_task_run_info.call_count == 0 # never called assert client.set_task_run_state.call_count == 1 # Pending -> Mapped assert client.get_latest_cached_states.call_count == 0 last_set_state = client.set_task_run_state.call_args_list[-1][1]["state"] assert last_set_state.is_mapped()
async def test_single_run_succeeds(self, flow, agent): flow_run_id = await api.runs.create_flow_run(flow_id=flow.server_id) await agent.run_scheduled(flow_id=flow.server_id) # wait for states to be written to the db await asyncio.sleep(1.5) fr = await models.FlowRun.where(id=flow_run_id).first( { "serialized_state": True, "task_runs": {"task": {"slug"}, "serialized_state": True}, }, ) task_states = { tr.task.slug: state_schema.load(tr.serialized_state) for tr in fr.task_runs } assert fr.serialized_state["type"] == "Success" assert task_states == {t.slug: Success() for t in flow.tasks}
def test_flow_runner_prioritizes_kwarg_states_over_db_states( monkeypatch, state): flow = prefect.Flow(name="test") db_state = state("already", result=10) get_flow_run_info = MagicMock(return_value=MagicMock(state=db_state)) set_flow_run_state = MagicMock() client = MagicMock(get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state) monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) res = CloudFlowRunner(flow=flow).run(state=Pending("let's do this")) ## assertions assert get_flow_run_info.call_count == 1 # one time to pull latest state assert set_flow_run_state.call_count == 2 # Pending -> Running -> Success states = [call[1]["state"] for call in set_flow_run_state.call_args_list] assert states == [Running(), Success(result={})]