def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: new_state = Mapped("Ready to proceed with mapping.") raise ENDRUN(new_state)
def check_task_ready_to_map(self, state: State, upstream_states: Dict[Edge, State]) -> State: """ Checks if the parent task is ready to proceed with mapping. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states Raises: - ENDRUN: either way, we dont continue past this point """ if state.is_mapped(): # this indicates we are executing a re-run of a mapped pipeline; # in this case, we populate both `map_states` and `cached_inputs` # to ensure the flow runner can properly regenerate the child tasks, # regardless of whether we mapped over an exchanged piece of data # or a non-data-exchanging upstream dependency if len(state.map_states ) == 0 and state.n_map_states > 0: # type: ignore state.map_states = [None] * state.n_map_states # type: ignore state.cached_inputs = { edge.key: state._result # type: ignore for edge, state in upstream_states.items() if edge.key } raise ENDRUN(state) # we can't map if there are no success states with iterables upstream if upstream_states and not any([ edge.mapped and state.is_successful() for edge, state in upstream_states.items() ]): new_state = Failed( "No upstream states can be mapped over.") # type: State raise ENDRUN(new_state) elif not all([ hasattr(state.result, "__getitem__") for edge, state in upstream_states.items() if state.is_successful() and not state.is_mapped() and edge.mapped ]): new_state = Failed( "At least one upstream state has an unmappable result.") raise ENDRUN(new_state) else: # compute and set n_map_states n_map_states = min( [ len(s.result) for e, s in upstream_states.items() if e.mapped and s.is_successful() and not s.is_mapped() ] + [ s.n_map_states # type: ignore for e, s in upstream_states.items() if e.mapped and s.is_mapped() ], default=0, ) new_state = Mapped("Ready to proceed with mapping.", n_map_states=n_map_states) raise ENDRUN(new_state)
def check_task_is_ready(self, state: State) -> State: """ Checks to make sure the task is ready to run (Pending or Mapped). Args: - state (State): the current state of this task Returns: - State: the state of the task after running the check Raises: - ENDRUN: if the task is not ready to run """ # the task is ready if state.is_pending(): return state # the task is mapped, in which case we still proceed so that the children tasks # are generated (note that if the children tasks) elif state.is_mapped(): self.logger.debug( "Task '{name}': task is mapped, but run will proceed so children are generated.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) return state # this task is already running elif state.is_running(): self.logger.debug( "Task '{name}': task is already running.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) raise ENDRUN(state) elif state.is_cached(): return state # this task is already finished elif state.is_finished(): self.logger.debug( "Task '{name}': task is already finished.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) raise ENDRUN(state) # this task is not pending else: self.logger.debug( "Task '{name}' is not ready to run or state was unrecognized ({state}).".format( name=prefect.context.get("task_full_name", self.task.name), state=state, ) ) raise ENDRUN(state)
def wait_for_mapped_task( self, state: State, executor: "prefect.engine.executors.Executor" ) -> State: """ Blocks until a mapped state's children have finished running. Args: - state (State): the current `Mapped` state - executor (Executor): the run's executor Returns: - State: the new state """ if state.is_mapped(): assert isinstance(state, Mapped) # mypy assert state.map_states = executor.wait(state.map_states) return state
def load_results( self, state: State, upstream_states: Dict[Edge, State] ) -> Tuple[State, Dict[Edge, State]]: """ Given the task's current state and upstream states, populates all relevant result objects for this task run. Args: - state (State): the task's current state. - upstream_states (Dict[Edge, State]): the upstream state_handlers Returns: - Tuple[State, dict]: a tuple of (state, upstream_states) """ upstream_results = {} try: if state.is_mapped(): # ensures mapped children are only loaded once state = state.load_result(self.result) for edge, upstream_state in upstream_states.items(): upstream_states[edge] = upstream_state.load_result( edge.upstream_task.result or self.flow_result ) if edge.key is not None: upstream_results[edge.key] = ( edge.upstream_task.result or self.flow_result ) state.load_cached_results(upstream_results) return state, upstream_states except Exception as exc: new_state = Failed( message=f"Failed to retrieve task results: {exc}", result=exc ) final_state = self.handle_state_change(old_state=state, new_state=new_state) raise ENDRUN(final_state) from exc
def run_mapped_task( self, state: State, upstream_states: Dict[Edge, State], context: Dict[str, Any], executor: "prefect.engine.executors.Executor", ) -> State: """ If the task is being mapped, submits children tasks for execution. Returns a `Mapped` state. Args: - state (State): the current task state - upstream_states (Dict[Edge, State]): the upstream states - context (dict, optional): prefect Context to use for execution - executor (Executor): executor to use when performing computation Returns: - State: the state of the task after running the check Raises: - ENDRUN: if the current state is not `Running` """ map_upstream_states = [] # we don't know how long the iterables are, but we want to iterate until we reach # the end of the shortest one counter = itertools.count() # infinite loop, if upstream_states has any entries while True and upstream_states: i = next(counter) states = {} try: for edge, upstream_state in upstream_states.items(): # if the edge is not mapped over, then we take its state if not edge.mapped: states[edge] = upstream_state # if the edge is mapped and the upstream state is Mapped, then we are mapping # over a mapped task. In this case, we take the appropriately-indexed upstream # state from the upstream tasks's `Mapped.map_states` array. # Note that these "states" might actually be futures at this time; we aren't # blocking until they finish. elif edge.mapped and upstream_state.is_mapped(): states[edge] = upstream_state.map_states[i] # type: ignore # Otherwise, we are mapping over the result of a "vanilla" task. In this # case, we create a copy of the upstream state but set the result to the # appropriately-indexed item from the upstream task's `State.result` # array. else: states[edge] = copy.copy(upstream_state) # if the current state is already Mapped, then we might be executing # a re-run of the mapping pipeline. In that case, the upstream states # might not have `result` attributes (as any required results could be # in the `cached_inputs` attribute of one of the child states). # Therefore, we only try to get a result if EITHER this task's # state is not already mapped OR the upstream result is not None. if not state.is_mapped() or upstream_state._result != NoResult: upstream_result = Result( upstream_state.result[i], result_handler=upstream_state._result.result_handler, # type: ignore ) states[edge].result = upstream_result elif state.is_mapped(): if i >= len(state.map_states): # type: ignore raise IndexError() # only add this iteration if we made it through all iterables map_upstream_states.append(states) # index error means we reached the end of the shortest iterable except IndexError: break def run_fn( state: State, map_index: int, upstream_states: Dict[Edge, State] ) -> State: map_context = context.copy() map_context.update(map_index=map_index) with prefect.context(self.context): return self.run( upstream_states=upstream_states, # if we set the state here, then it will not be processed by `initialize_run()` state=state, context=map_context, executor=executor, ) # generate initial states, if available if isinstance(state, Mapped): initial_states = list(state.map_states) # type: List[Optional[State]] else: initial_states = [] initial_states.extend([None] * (len(map_upstream_states) - len(initial_states))) current_state = Mapped( message="Preparing to submit {} mapped tasks.".format(len(initial_states)), map_states=initial_states, # type: ignore ) state = self.handle_state_change(old_state=state, new_state=current_state) if state is not current_state: return state # map over the initial states, a counter representing the map_index, and also the mapped upstream states map_states = executor.map( run_fn, initial_states, range(len(map_upstream_states)), map_upstream_states ) self.logger.debug( "{} mapped tasks submitted for execution.".format(len(map_states)) ) new_state = Mapped( message="Mapped tasks submitted for execution.", map_states=map_states ) return self.handle_state_change(old_state=state, new_state=new_state)