def _validate_subset_info(subset_info, execution_plan, pipeline_context): if not subset_info: return for step_key in subset_info.subset: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Step {step_key} does not exist.'.format(step_key=step_key), step_key=step_key) for step_key, input_dict in subset_info.input_step_factory_fns.items(): if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Step {step_key} does not exist.'.format(step_key=step_key), step_key=step_key) step = execution_plan.get_step_by_key(step_key) for input_name in input_dict.keys(): if not step.has_step_input(input_name): raise DagsterInvalidSubplanInputNotFoundError( 'Input {input_name} on {step_key} does not exist.'.format( input_name=input_name, step_key=step_key), pipeline_name=pipeline_context.pipeline_def.name, step_keys=list(subset_info.subset), input_name=input_name, step=step, )
def __new__( cls, pipeline, step_dict, step_handles_to_execute, environment_config, known_state=None, ): check.list_param( step_handles_to_execute, "step_handles_to_execute", of_type=(StepHandle, UnresolvedStepHandle, ResolvedFromDynamicStepHandle), ) missing_steps = [ step_handle.to_key() for step_handle in step_handles_to_execute if step_handle not in step_dict ] if missing_steps: raise DagsterExecutionStepNotFoundError( "Execution plan does not contain step{plural}: {steps}".format( plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps) ), step_keys=missing_steps, ) executable_map = {} for handle in step_handles_to_execute: step = step_dict[handle] if isinstance(step, ExecutionStep): executable_map[step.key] = step.handle resolvable_map: Dict[str, List[UnresolvedStepHandle]] = defaultdict(list) for handle in step_handles_to_execute: step = step_dict[handle] if isinstance(step, UnresolvedExecutionStep): if step.resolved_by_step_key not in executable_map: raise DagsterInvariantViolationError( f'UnresolvedExecutionStep "{step.key}" is resolved by "{step.resolved_by_step_key}" ' "which is not part of the current step selection" ) resolvable_map[step.resolved_by_step_key].append(step.handle) return super(ExecutionPlan, cls).__new__( cls, pipeline=check.inst_param(pipeline, "pipeline", IPipeline), step_dict=check.dict_param( step_dict, "step_dict", key_type=StepHandleTypes, value_type=(ExecutionStep, UnresolvedExecutionStep), ), executable_map=executable_map, resolvable_map=resolvable_map, step_handles_to_execute=step_handles_to_execute, environment_config=check.inst_param( environment_config, "environment_config", EnvironmentConfig ), known_state=check.opt_inst_param(known_state, "known_state", KnownExecutionState), )
def build_subset_plan(self, step_keys_to_execute: List[str]) -> "ExecutionPlan": check.list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) step_handles_to_execute = [ StepHandle.parse_from_key(key) for key in step_keys_to_execute ] bad_keys = [] for handle in step_handles_to_execute: if handle not in self.step_dict: bad_keys.append(handle.to_key()) if bad_keys: raise DagsterExecutionStepNotFoundError( f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}", step_keys=bad_keys, ) return ExecutionPlan( self.pipeline, self.step_dict, step_handles_to_execute, self.environment_config, self.known_state, )
def __new__( cls, pipeline, step_dict, deps, artifacts_persisted, step_keys_to_execute, ): missing_steps = [ step_key for step_key in step_keys_to_execute if step_key not in step_dict ] if missing_steps: raise DagsterExecutionStepNotFoundError( "Execution plan does not contain step{plural}: {steps}".format( plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps)), step_keys=missing_steps, ) return super(ExecutionPlan, cls).__new__( cls, pipeline=check.inst_param(pipeline, "pipeline", IPipeline), step_dict=check.dict_param(step_dict, "step_dict", key_type=str, value_type=ExecutionStep), deps=check.dict_param(deps, "deps", key_type=str, value_type=set), steps=list(step_dict.values()), artifacts_persisted=check.bool_param(artifacts_persisted, "artifacts_persisted"), step_keys_to_execute=check.list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str), )
def _check_reexecution_config(pipeline_context, execution_plan, run_config): check.invariant(pipeline_context.run_storage) if not pipeline_context.run_storage.is_persistent: raise DagsterInvariantViolationError( 'Cannot perform reexecution with non persistent run storage.') previous_run_id = run_config.reexecution_config.previous_run_id if not pipeline_context.run_storage.has_run(previous_run_id): raise DagsterRunNotFoundError( 'Run id {} set as previous run id was not found in run storage'. format(previous_run_id), invalid_run_id=previous_run_id, ) for step_output_handle in run_config.reexecution_config.step_output_handles: if not execution_plan.has_step(step_output_handle.step_key): raise DagsterExecutionStepNotFoundError( ('Step {step_key} was specified as a step from a previous run. ' 'It does not exist.').format( step_key=step_output_handle.step_key), step_key=step_output_handle.step_key, ) step = execution_plan.get_step_by_key(step_output_handle.step_key) if not step.has_step_output(step_output_handle.output_name): raise DagsterStepOutputNotFoundError( ('You specified a step_output_handle in the ReexecutionConfig that does ' 'not exist: Step {step_key} does not have output {output_name}.' ).format(step_key=step_output_handle.step_key, output_name=step_output_handle.output_name), step_key=step_output_handle.step_key, output_name=step_output_handle.output_name, )
def execute_plan(execution_plan, environment_dict=None, run_config=None, step_keys_to_execute=None): '''This is the entry point of dagster-graphql executions. For the dagster CLI entry point, see execute_pipeline() above. ''' check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, execution_plan.pipeline_def) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) if step_keys_to_execute: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step "{}"'.format( step_key), step_key=step_key) with scoped_pipeline_context(execution_plan.pipeline_def, environment_dict, run_config) as pipeline_context: _setup_reexecution(run_config, pipeline_context, execution_plan) return list( invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute))
def _validate_added_outputs(added_outputs, execution_plan, subset_info, pipeline_context): if not added_outputs: return for step_key, outputs_for_step in added_outputs.output_step_factory_fns.items( ): if not _is_step_in_subset(execution_plan, subset_info, step_key): raise DagsterExecutionStepNotFoundError( 'Step {step_key} does not exist.'.format(step_key=step_key), step_key=step_key) step = execution_plan.get_step_by_key(step_key) for output in outputs_for_step: check.inst(output, OutputStepFactoryEntry) output_name = output.output_name if not step.has_step_output(output_name): raise DagsterInvalidSubplanOutputNotFoundError( 'Execution step {step_key} does not have output {output}'. format(step_key=step_key, output=output_name), pipeline_name=pipeline_context.pipeline_def.name, step_keys=list(subset_info.subset), step=step, output_name=output_name, )
def __new__( cls, pipeline_def, step_dict, deps, artifacts_persisted, previous_run_id, step_keys_to_execute, ): missing_steps = [step_key for step_key in step_keys_to_execute if step_key not in step_dict] if missing_steps: raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step{plural}: {steps}'.format( plural='s' if len(missing_steps) > 1 else '', steps=', '.join(missing_steps) ), step_keys=missing_steps, ) return super(ExecutionPlan, cls).__new__( cls, pipeline_def=check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition), step_dict=check.dict_param( step_dict, 'step_dict', key_type=str, value_type=ExecutionStep ), deps=check.dict_param(deps, 'deps', key_type=str, value_type=set), steps=list(step_dict.values()), artifacts_persisted=check.bool_param(artifacts_persisted, 'artifacts_persisted'), previous_run_id=check.opt_str_param(previous_run_id, 'previous_run_id'), step_keys_to_execute=check.list_param( step_keys_to_execute, 'step_keys_to_execute', of_type=str ), )
def __new__( cls, pipeline, step_dict, step_handles_to_execute, artifacts_persisted, environment_config, ): check.list_param(step_handles_to_execute, "step_handles_to_execute", of_type=StepHandle) missing_steps = [ step_handle.to_key() for step_handle in step_handles_to_execute if step_handle not in step_dict ] if missing_steps: raise DagsterExecutionStepNotFoundError( "Execution plan does not contain step{plural}: {steps}".format( plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps) ), step_keys=missing_steps, ) executable_map = {} for handle in step_handles_to_execute: step = step_dict[handle] executable_map[step.key] = step.handle return super(ExecutionPlan, cls).__new__( cls, pipeline=check.inst_param(pipeline, "pipeline", IPipeline), step_dict=check.dict_param( step_dict, "step_dict", key_type=StepHandle, value_type=ExecutionStep, ), executable_map=executable_map, artifacts_persisted=check.bool_param(artifacts_persisted, "artifacts_persisted"), step_handles_to_execute=step_handles_to_execute, environment_config=check.inst_param( environment_config, "environment_config", EnvironmentConfig ), )
def _resolve_step_keys(execution_plan, step_keys_to_execute): if step_keys_to_execute is None: step_keys_to_execute = [step.key for step in execution_plan.topological_steps()] else: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step \'{}\''.format(step_key), step_key=step_key, ) return step_keys_to_execute
def _compute_step_maps(step_dict, step_handles_to_execute, known_state): check.list_param( step_handles_to_execute, "step_handles_to_execute", of_type=(StepHandle, UnresolvedStepHandle, ResolvedFromDynamicStepHandle), ) missing_steps = [ step_handle.to_key() for step_handle in step_handles_to_execute if step_handle not in step_dict ] if missing_steps: raise DagsterExecutionStepNotFoundError( "Execution plan does not contain step{plural}: {steps}".format( plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps)), step_keys=missing_steps, ) step_keys_to_execute = [ step_handle.to_key() for step_handle in step_handles_to_execute ] executable_map = {} resolvable_map: Dict[str, List[UnresolvedStepHandle]] = defaultdict(list) for handle in step_handles_to_execute: step = step_dict[handle] if isinstance(step, ExecutionStep): executable_map[step.key] = step.handle elif isinstance( step, (UnresolvedMappedExecutionStep, UnresolvedCollectExecutionStep)): for key in step.resolved_by_step_keys: if key not in step_keys_to_execute: raise DagsterInvariantViolationError( f'Unresolved ExecutionStep "{step.key}" is resolved by "{step.resolved_by_step_key}" ' "which is not part of the current step selection") resolvable_map[step.resolved_by_step_keys].append(step.handle) else: check.invariant(step.key in executable_map, "Expect all steps to be executable or resolvable") if known_state: _update_from_resolved_dynamic_outputs( step_dict, executable_map, resolvable_map, step_handles_to_execute, known_state.dynamic_mappings, ) return (executable_map, resolvable_map)
def invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute=None): if step_keys_to_execute: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError(step_key=step_key) # Engine execution returns a generator of yielded events, so returning here means this function # also returns a generator return pipeline_context.executor_config.get_engine().execute( pipeline_context, execution_plan, step_keys_to_execute)
def _resolve_step_keys(execution_plan, step_keys_to_execute): if step_keys_to_execute is None: step_keys_to_execute = [step.key for step in execution_plan.topological_steps()] else: missing_steps = [ step_key for step_key in step_keys_to_execute if not execution_plan.has_step(step_key) ] if missing_steps: raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step{plural}: {steps}'.format( plural='s' if len(missing_steps) > 1 else '', steps=', '.join(missing_steps) ), step_keys=missing_steps, ) return step_keys_to_execute
def parse_step_selection(step_deps, step_selection): """Take the dependency dictionary generated while building execution plan and a list of step key selection queries and return a set of the qualified step keys. It currently only supports top-level solids. Args: step_deps (Dict[str, Set[str]]): a dictionary of execution step dependency where the key is a step key and the value is a set of direct upstream dependency of the step. step_selection (List[str]): a list of the step key selection queries (including single step key) to execute. Returns: FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified subset selected. """ check.list_param(step_selection, "step_selection", of_type=str) # reverse step_deps to get the downstream_deps # make sure we have all items as keys, including the ones without downstream dependencies downstream_deps = defaultdict(set, {k: set() for k in step_deps.keys()}) for downstream_key, upstream_keys in step_deps.items(): for step_key in upstream_keys: downstream_deps[step_key].add(downstream_key) # generate dep graph graph = {"upstream": step_deps, "downstream": downstream_deps} steps_set = set() step_keys = parse_items_from_selection(step_selection) invalid_keys = [key for key in step_keys if key not in step_deps] if invalid_keys: raise DagsterExecutionStepNotFoundError( f"Step selection refers to unknown step{'s' if len(invalid_keys)> 1 else ''}: {', '.join(invalid_keys)}", step_keys=invalid_keys, ) # loop over clauses for clause in step_selection: subset = clause_to_subset(graph, clause) if len(subset) == 0: raise DagsterInvalidSubsetError( "No qualified steps to execute found for step_selection={requested}" .format(requested=step_selection), ) steps_set.update(subset) return frozenset(steps_set)
def build_subset_plan( self, step_keys_to_execute: List[str], pipeline_def: PipelineDefinition, environment_config: EnvironmentConfig, ) -> "ExecutionPlan": check.list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) step_handles_to_execute = [ StepHandle.parse_from_key(key) for key in step_keys_to_execute ] bad_keys = [] for handle in step_handles_to_execute: if handle not in self.step_dict: bad_keys.append(handle.to_key()) if bad_keys: raise DagsterExecutionStepNotFoundError( f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}", step_keys=bad_keys, ) executable_map, resolvable_map = _compute_step_maps( self.step_dict, step_handles_to_execute, self.known_state, ) return ExecutionPlan( self.step_dict, executable_map, resolvable_map, step_handles_to_execute, self.known_state, _compute_artifacts_persisted( self.step_dict, step_handles_to_execute, pipeline_def, environment_config, executable_map, ), )
def build_subset_plan(self, step_keys_to_execute: List[str]) -> "ExecutionPlan": check.list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) step_handles_to_execute = [ StepHandle.parse_from_key(key) for key in step_keys_to_execute ] bad_keys = [] for handle in step_handles_to_execute: if handle in self.step_dict: pass # no further processing required elif (isinstance(handle, ResolvedFromDynamicStepHandle) and handle.unresolved_form in self.step_dict): unresolved_step = cast(UnresolvedExecutionStep, self.step_dict[handle.unresolved_form]) # self.step_dict updated as side effect self.resolve( unresolved_step.resolved_by_step_key, { unresolved_step.resolved_by_output_name: [handle.mapping_key] }, ) check.invariant( handle in self.step_dict, f"Handle did not resolve as expected, not found in step dict {handle}", ) else: bad_keys.append(handle.to_key()) if bad_keys: raise DagsterExecutionStepNotFoundError( f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}", step_keys=bad_keys, ) return ExecutionPlan( self.pipeline, self.step_dict, step_handles_to_execute, self.environment_config, )
def invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute=None): if step_keys_to_execute: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError(step_key=step_key) # Toggle engine based on executor config supplied by the pipeline context def get_engine_for_config(cfg): if isinstance(cfg, InProcessExecutorConfig): return InProcessEngine elif isinstance(cfg, MultiprocessExecutorConfig): return MultiprocessingEngine else: check.failed('Unsupported config {}'.format(cfg)) # Engine execution returns a generator of yielded events, so returning here means this function # also returns a generator return get_engine_for_config(pipeline_context.executor_config).execute( pipeline_context, execution_plan, step_keys_to_execute )
def _steps_execution_iterator(pipeline_context, execution_plan, run_config, step_keys_to_execute): '''Iterates over execution of individual steps yielding the associated events. Does not yield pipeline level events asside from init failure when the context fails to construct. ''' check.inst_param(pipeline_context, 'pipeline_context', (DagsterEvent, SystemPipelineExecutionContext)) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.inst_param(run_config, 'run_config', RunConfig) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) if (isinstance(pipeline_context, DagsterEvent) and pipeline_context.event_type # pylint: disable=no-member == DagsterEventType.PIPELINE_INIT_FAILURE): return ensure_gen(pipeline_context) if not step_keys_to_execute: step_keys_to_execute = [ step.key for step in execution_plan.topological_steps() ] if not step_keys_to_execute: pipeline_context.log.debug( 'Pipeline {pipeline} has no steps to execute and no execution will happen' .format(pipeline=pipeline_context.pipeline_def.display_name)) return ensure_gen(DagsterEvent.pipeline_success(pipeline_context)) else: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step \'{}\''.format( step_key), step_key=step_key, ) _setup_reexecution(run_config, pipeline_context, execution_plan) return _invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute)
def _execute_plan_iterator(pipeline_context, execution_plan, run_config, step_keys_to_execute): check.inst_param(pipeline_context, 'pipeline_context', (DagsterEvent, SystemPipelineExecutionContext)) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.inst_param(run_config, 'run_config', RunConfig) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) if (isinstance(pipeline_context, DagsterEvent) and pipeline_context.event_type # pylint: disable=no-member == DagsterEventType.PIPELINE_INIT_FAILURE): return ensure_gen(pipeline_context) if not step_keys_to_execute: step_keys_to_execute = [ step.key for step in execution_plan.topological_steps() ] if not step_keys_to_execute: pipeline_context.log.debug( 'Pipeline {pipeline} has no steps to execute and no execution will happen' .format(pipeline=pipeline_context.pipeline_def.display_name)) return ensure_gen(DagsterEvent.pipeline_success(pipeline_context)) else: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): raise DagsterExecutionStepNotFoundError( 'Execution plan does not contain step "{}"'.format( step_key), step_key=step_key) _setup_reexecution(run_config, pipeline_context, execution_plan) return _invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute)