def test_viz_reflects_multiple_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state1 = Mapped(map_states=[Success(), TriggerFailed()]) map_state2 = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: first_res = add.map(x=list_task, y=8) with pytest.warns( UserWarning ): # making a copy of a task with dependencies res = first_res.map(x=first_res, y=9) graph = f.visualize(flow_state=Success( result={ res: map_state1, list_task: Success(), first_res: map_state2, })) assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "0", second=str(id(res)) + "0") assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "1", second=str(id(res)) + "1")
def test_state_load_result_loads_map_states(self): """ This test ensures that loading state results also loads mapped children results. See https://github.com/PrefectHQ/prefect/pull/2952 """ class MyResult(Result): def read(self, *args, **kwargs): new = self.copy() new.value = kwargs.get("location", args[0]) return new state = Mapped(map_states=[ None, Success("1", result=MyResult(location="foo")), Success("2", result=MyResult(location="bar")), ]) assert state.message is None assert state.result is None assert [getattr(s, "result", None) for s in state.map_states] == [None] * 3 new_state = state.load_result(MyResult(location="")) assert new_state.result == [None, "foo", "bar"] assert not new_state._result.location assert [getattr(s, "result", None) for s in state.map_states] == [ None, "foo", "bar", ]
def test_n_map_states(): state = Mapped(map_states=[1, 2]) assert state.n_map_states == 2 state = Mapped(n_map_states=4) assert state.n_map_states == 4 state = Mapped(map_states=[1, 2], n_map_states=4) assert state.n_map_states == 4
def test_viz_reflects_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: res = add.map(x=list_task, y=8) graph = f.visualize(flow_state=Success(result={ res: map_state, list_task: Success() })) # one colored node for each mapped result assert 'label="a_nice_task <map>" color="#00800080"' in graph.source assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source assert 'label=a_list_task color="#00800080"' in graph.source assert 'label=8 color="#00000080"' in graph.source # two edges for each input to add() for var in ["x", "y"]: for index in [0, 1]: assert "{0} [label={1} style=dashed]".format( index, var) in graph.source
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor): """ This test makes sure that mapped tasks properly generate children tasks even when run multiple times and without available upstream results. In this test, we run the pipeline from a variety of starting points, ensuring that some upstream results are unavailable and checking that children pipelines are properly regenerated. Note that upstream results will be hydrated from remote locations when running with a Cloud TaskRunner. """ @prefect.task def numbers(): return [1, 2, 3] @prefect.task def identity(x): return x with Flow(name="test") as f: n = numbers() x = identity.map(n) # first run with a missing result from `n` but map_states for `x` state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Pending() for i in range(1, 4)]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[x].result == [None] * 3
def test_mapped_retries_regenerate_child_pipelines(): """ This test sets up a situation analogous to one found in Cloud: if a reduce task fails, and a user retries it in the future, we want to make sure that the mapped children pipelines are correctly regenerated. When run against Cloud, these child tasks will correctly query for their states and the run will proceed with the correct data. This test mimics this scenario by running this flow with a provided set of states that only contain metadata about the runs with no actual data to reference. The child runs should still be produced based only on the n_map_states attribute of the parent. """ idt = IdTask() ll = ListTask() with Flow("test") as flow: mapped = idt.map(ll) reduced = idt(mapped) flow_state = flow.run() assert flow_state.is_successful() assert flow_state.result[mapped].is_mapped() assert flow_state.result[reduced].is_successful() assert flow_state.result[reduced].result == [1, 2, 3] second_pass_states = { mapped: Mapped(n_map_states=3), ll: Success(result=Result()) } new_state = flow.run(task_states=second_pass_states) assert new_state.is_successful() assert new_state.result[mapped].is_mapped() assert new_state.result[reduced].is_successful()
def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: new_state = Mapped("Ready to proceed with mapping.") raise ENDRUN(new_state)
def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): # this indicates we are executing a re-run of a mapped pipeline; # in this case, we populate both `map_states` and `cached_inputs` # to ensure the flow runner can properly regenerate the child tasks, # regardless of whether we mapped over an exchanged piece of data # or a non-data-exchanging upstream dependency if len(state.map_states ) == 0 and state.n_map_states > 0: # type: ignore state.map_states = [None] * state.n_map_states # type: ignore state.cached_inputs = { edge.key: state._result # type: ignore for edge, state in upstream_states.items() if edge.key } raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: # compute and set n_map_states n_map_states = min( [ len(s.result) for e, s in upstream_states.items() if e.mapped and s.is_successful() and not s.is_mapped() ] + [ s.n_map_states # type: ignore for e, s in upstream_states.items() if e.mapped and s.is_mapped() ], default=0, ) new_state = Mapped("Ready to proceed with mapping.", n_map_states=n_map_states) raise ENDRUN(new_state)
def test_mapped_will_use_existing_map_states_if_available(self, executor): with Flow(name="test") as flow: res = ReturnTask().map([0, 1]) state = FlowRunner(flow=flow).run( return_tasks=[res], executor=executor, task_states={res: Mapped(map_states=[Success(), Success(result=100)])}, ) assert state.is_successful() assert state.result[res].map_states[1].is_successful() assert state.result[res].map_states[1].result == 100
def run_mapped_task( self, state: State, upstream_states: Dict[Edge, State], context: Dict[str, Any], executor: "prefect.engine.executors.Executor", ) -> State: """ If the task is being mapped, submits children tasks for execution. Returns a `Mapped` state. Args: - state (State): the current task state - upstream_states (Dict[Edge, State]): the upstream states - context (dict, optional): prefect Context to use for execution - executor (Executor): executor to use when performing computation Returns: - State: the state of the task after running the check Raises: - ENDRUN: if the current state is not `Running` """ map_upstream_states = [] # we don't know how long the iterables are, but we want to iterate until we reach # the end of the shortest one counter = itertools.count() # infinite loop, if upstream_states has any entries while True and upstream_states: i = next(counter) states = {} try: for edge, upstream_state in upstream_states.items(): # if the edge is not mapped over, then we take its state if not edge.mapped: states[edge] = upstream_state # if the edge is mapped and the upstream state is Mapped, then we are mapping # over a mapped task. In this case, we take the appropriately-indexed upstream # state from the upstream tasks's `Mapped.map_states` array. # Note that these "states" might actually be futures at this time; we aren't # blocking until they finish. elif edge.mapped and upstream_state.is_mapped(): states[edge] = upstream_state.map_states[i] # type: ignore # Otherwise, we are mapping over the result of a "vanilla" task. In this # case, we create a copy of the upstream state but set the result to the # appropriately-indexed item from the upstream task's `State.result` # array. else: states[edge] = copy.copy(upstream_state) # if the current state is already Mapped, then we might be executing # a re-run of the mapping pipeline. In that case, the upstream states # might not have `result` attributes (as any required results could be # in the `cached_inputs` attribute of one of the child states). # Therefore, we only try to get a result if EITHER this task's # state is not already mapped OR the upstream result is not None. if not state.is_mapped() or upstream_state._result != NoResult: upstream_result = Result( upstream_state.result[i], result_handler=upstream_state._result.result_handler, # type: ignore ) states[edge].result = upstream_result elif state.is_mapped(): if i >= len(state.map_states): # type: ignore raise IndexError() # only add this iteration if we made it through all iterables map_upstream_states.append(states) # index error means we reached the end of the shortest iterable except IndexError: break def run_fn( state: State, map_index: int, upstream_states: Dict[Edge, State] ) -> State: map_context = context.copy() map_context.update(map_index=map_index) with prefect.context(self.context): return self.run( upstream_states=upstream_states, # if we set the state here, then it will not be processed by `initialize_run()` state=state, context=map_context, executor=executor, ) # generate initial states, if available if isinstance(state, Mapped): initial_states = list(state.map_states) # type: List[Optional[State]] else: initial_states = [] initial_states.extend([None] * (len(map_upstream_states) - len(initial_states))) current_state = Mapped( message="Preparing to submit {} mapped tasks.".format(len(initial_states)), map_states=initial_states, # type: ignore ) state = self.handle_state_change(old_state=state, new_state=current_state) if state is not current_state: return state # map over the initial states, a counter representing the map_index, and also the mapped upstream states map_states = executor.map( run_fn, initial_states, range(len(map_upstream_states)), map_upstream_states ) self.logger.debug( "{} mapped tasks submitted for execution.".format(len(map_states)) ) new_state = Mapped( message="Mapped tasks submitted for execution.", map_states=map_states ) return self.handle_state_change(old_state=state, new_state=new_state)
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor): """ This test makes sure that mapped tasks properly generate children tasks even when run multiple times and without available upstream results. In this test, we run the pipeline from a variety of starting points, ensuring that some upstream results are unavailable and checking that children pipelines are properly regenerated. """ @prefect.task def numbers(): return [1, 2, 3] @prefect.task def plus_one(x): return x + 1 @prefect.task def get_sum(x): return sum(x) with Flow(name="test") as f: n = numbers() x = plus_one.map(n) y = plus_one.map(x) s = get_sum(y) # first run with a missing result from `n` but map_states for `x` state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[ Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n and x state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Retrying(cached_inputs={"x": Result(4)}), ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n, x, and y state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Success(result=5) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12
def test_trigger_failed_is_failed(self): assert issubclass(TriggerFailed, Failed) @pytest.mark.parametrize( "state_check", [ dict(state=Cancelled(), assert_true={"is_finished"}), dict(state=Cached(), assert_true={"is_cached", "is_finished", "is_successful"}), dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict(state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"}), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict(state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"}), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}), dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}),
assert issubclass(TimedOut, Failed) def test_trigger_failed_is_failed(self): assert issubclass(TriggerFailed, Failed) @pytest.mark.parametrize( "state_check", [ dict(state=Cancelled(), assert_true={"is_finished", "is_failed"}), dict(state=Cached(), assert_true={"is_cached", "is_finished", "is_successful"}), dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict( state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"} ), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict( state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"} ), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
def test_state_type_methods_with_mapped_state(self): state = Mapped() assert not state.is_cached() assert not state.is_pending() assert not state.is_retrying() assert not state.is_running() assert state.is_finished() assert not state.is_skipped() assert not state.is_scheduled() assert state.is_successful() assert not state.is_failed() assert state.is_mapped() assert not state.is_meta_state()