def test_cant_load_old_snapshot(): snapshot = deserialize_json_to_dagster_namedtuple( OLD_EXECUTION_PLAN_SNAPSHOT) with pytest.raises( DagsterInvariantViolationError, match= "Tried to reconstruct an old ExecutionPlanSnapshot that was created before snapshots had enough information to fully reconstruct the ExecutionPlan", ): ExecutionPlan.rebuild_from_snapshot("noop_pipeline", snapshot)
def test_execution_plan_snapshot_backcompat(): src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/") snapshot_dirs = [ f for f in os.listdir(src_dir) if not os.path.isfile(os.path.join(src_dir, f)) ] for snapshot_dir_path in snapshot_dirs: print(f"Executing a saved run from {snapshot_dir_path}") # pylint: disable=print-call with copy_directory(os.path.join(src_dir, snapshot_dir_path)) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert run.status == PipelineRunStatus.NOT_STARTED the_pipeline = InMemoryPipeline(dynamic_pipeline) # First create a brand new plan from the pipeline and validate it new_plan = create_execution_plan(the_pipeline, run_config=run.run_config) _validate_execution_plan(new_plan) # Create a snapshot and rebuild it, validate the rebuilt plan new_plan_snapshot = snapshot_from_execution_plan( new_plan, run.pipeline_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", new_plan_snapshot) _validate_execution_plan(rebuilt_plan) # Then validate the plan built from the historical snapshot on the run stored_snapshot = instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", stored_snapshot) _validate_execution_plan(rebuilt_plan) # Finally, execute the run (using the historical execution plan snapshot) result = execute_run(the_pipeline, run, instance, raise_on_error=True) assert result.success
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": { "add_four": { "inputs": { "num": { "value": 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add_two.add_one", "add_four.add_two.add_one_2", "add_four.add_two_2.add_one", "add_four.add_two_2.add_one_2", "div_four.div_two", "div_four.div_two_2", "int_to_float", }
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterExecutionLoadInputError" in failures[ 0].event_specific_data.error.message
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, known_state: KnownExecutionState = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_nullable_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build( pipeline, resolved_run_config, step_keys_to_execute=step_keys_to_execute, known_state=known_state, )
def test_compile(): # TODO: remove dependency on legacy_examples # https://github.com/dagster-io/dagster/issues/2653 environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def create_execution_plan(pipeline, environment_dict=None, mode=None, step_keys_to_execute=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success ## re-execute add_two environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )
def test_compile(): environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def test_compile(): run_config = RunConfig() environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, run_config=None) plan = ExecutionPlan.build( composition, environment_config, composition.get_mode_definition(run_config.mode)) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', }
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterStepOutputNotFoundError" in failures[ 0].event_specific_data.error.message
def execute_run_host_mode( pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, executor_defs: Optional[List[ExecutorDefinition]] = None, raise_on_error: bool = False, ): check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.opt_list_param(executor_defs, "executor_defs", of_type=ExecutorDefinition) executor_defs = executor_defs if executor_defs != None else default_executors if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) if pipeline_run.solids_to_execute: pipeline = pipeline.subset_for_execution_from_existing_pipeline( frozenset(pipeline_run.solids_to_execute)) execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) execution_plan = ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=host_mode_execution_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, run_config=pipeline_run.run_config, pipeline_run=pipeline_run, instance=instance, raise_on_error=raise_on_error, executor_defs=executor_defs, output_capture=None, ), ) event_list = list(_execute_run_iterable) return event_list
def inner_plan_execution_iterator( pipeline_context: PlanExecutionContext, execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]: check.inst_param(pipeline_context, "pipeline_context", PlanExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) with execution_plan.start( retry_mode=pipeline_context.retry_mode) as active_execution: # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 while not active_execution.is_complete: step = active_execution.get_next_step() step_context = cast( StepExecutionContext, pipeline_context.for_step( step, active_execution.retry_state.get_attempt_count(step.key)), ) step_event_list = [] missing_resources = [ resource_key for resource_key in step_context.required_resource_keys if not hasattr(step_context.resources, resource_key) ] check.invariant( len(missing_resources) == 0, ("Expected step context for solid {solid_name} to have all required resources, but " "missing {missing_resources}.").format( solid_name=step_context.solid.name, missing_resources=missing_resources), ) # capture all of the logs for this step with pipeline_context.instance.compute_log_manager.watch( step_context.pipeline_run, step_context.step.key): for step_event in check.generator( _dagster_event_sequence_for_step(step_context)): check.inst(step_event, DagsterEvent) step_event_list.append(step_event) yield step_event active_execution.handle_event(step_event) active_execution.verify_complete(pipeline_context, step.key) # process skips from failures or uncovered inputs for event in active_execution.plan_events_iterator( pipeline_context): step_event_list.append(event) yield event # pass a list of step events to hooks for hook_event in _trigger_hook(step_context, step_event_list): yield hook_event
def test_using_intermediate_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: run_config = {"intermediate_storage": {"filesystem": {}}} pipeline = reconstructable(define_inty_pipeline) environment_config = EnvironmentConfig.build( pipeline.get_definition(), run_config=run_config, ) execution_plan = ExecutionPlan.build( pipeline, environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("add_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2)
def create_execution_plan(pipeline, environment_dict=None, mode=None): check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) check.opt_str_param(mode, 'mode') environment_config = create_environment_config(pipeline, environment_dict, mode) return ExecutionPlan.build(pipeline, environment_config)
def _execute_pipeline_iterator(context_or_failure_event): # Due to use of context managers, if the user land code in context or resource init fails # we can get either a pipeline_context or the failure event here. if (isinstance(context_or_failure_event, DagsterEvent) and context_or_failure_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE): yield context_or_failure_event return pipeline_context = context_or_failure_event check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) yield DagsterEvent.pipeline_start(pipeline_context) execution_plan = ExecutionPlan.build( pipeline_context.pipeline_def, pipeline_context.environment_config, pipeline_context.mode_def, ) steps = execution_plan.topological_steps() if not steps: pipeline_context.log.debug( 'Pipeline {pipeline} has no nodes and no execution will happen'. format(pipeline=pipeline_context.pipeline_def.display_name)) yield DagsterEvent.pipeline_success(pipeline_context) return _setup_reexecution(pipeline_context.run_config, pipeline_context, execution_plan) pipeline_context.log.debug( 'About to execute the compute node graph in the following order {order}' .format(order=[step.key for step in steps])) check.invariant( len([ step_input for step_input in steps[0].step_inputs if step_input.is_from_output ]) == 0) pipeline_success = True try: for event in invoke_executor_on_plan( pipeline_context, execution_plan, pipeline_context.run_config.step_keys_to_execute): if event.is_step_failure: pipeline_success = False yield event finally: if pipeline_success: yield DagsterEvent.pipeline_success(pipeline_context) else: yield DagsterEvent.pipeline_failure(pipeline_context)
def create_execution_plan(pipeline, environment_dict=None, run_config=None): check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, RunConfig()) environment_config = EnvironmentConfig.build(pipeline, environment_dict, run_config) return ExecutionPlan.build( pipeline, environment_config, pipeline.get_mode_definition(run_config.mode) )
def execute_run_host_mode( pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None, raise_on_error: bool = False, ): check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn") if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) if pipeline_run.solids_to_execute: pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) execution_plan = ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=HostModeRunWorkerExecutionContextManager( execution_plan=execution_plan, recon_pipeline=pipeline, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, get_executor_def_fn=get_executor_def_fn, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) return event_list
def test_using_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: pipeline = reconstructable(define_reconstructable_inty_pipeline) resolved_run_config = ResolvedRunConfig.build( pipeline.get_definition(), ) execution_plan = ExecutionPlan.build( pipeline, resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def test_using_intermediates_file_system_for_subplan(): pipeline = define_inty_pipeline() run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2
def rebuild_execution_plan_from_snapshot( pipeline: IPipeline, run_config: Optional[dict], mode: Optional[str], execution_plan_snapshot: ExecutionPlanSnapshot, ) -> ExecutionPlan: pipeline_def = pipeline.get_definition() environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.rebuild_from_snapshot( pipeline, pipeline_def.name, execution_plan_snapshot, environment_config, )
def _get_execution_plan_from_run(pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance) -> ExecutionPlan: if pipeline_run.execution_plan_snapshot_id: execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) if execution_plan_snapshot.can_reconstruct_plan: return ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) return create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, )
def test_using_file_system_for_subplan(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline), resolved_run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode) execution_plan = ExecutionPlan.build(pipeline, environment_config) step_output_versions = resolve_step_output_versions( pipeline.get_definition(), execution_plan, environment_config) memoized_plan = resolve_memoized_execution_plan(execution_plan, pipeline.get_definition(), run_config, instance, environment_config) # the step keys that we need to execute are those which do not have their inputs populated. step_keys_not_stored = set(memoized_plan.step_keys_to_execute) table = [] for step_output_handle, version in step_output_versions.items(): table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, "stored" if step_output_handle.step_key not in step_keys_not_stored else "to-be-recomputed", ]) table_str = tabulate( table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github") click.echo(table_str)
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute"] assert str(exc_info.value ) == "Can not build subset plan from unknown step: nope.compute" with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan( ["nope.compute", "nuh_uh.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"] assert ( str(exc_info.value) == "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute" )
def execute(self): from dagster.core.execution.api import scoped_pipeline_context check.inst(self.run_config.executor_config, MultiprocessExecutorConfig) pipeline = self.run_config.executor_config.handle.build_pipeline_definition( ) with scoped_pipeline_context( pipeline, self.environment_dict, self.run_config.with_tags( pid=str(os.getpid()))) as pipeline_context: execution_plan = ExecutionPlan.build( pipeline_context.pipeline_def, pipeline_context.environment_config) for step_event in InProcessEngine.execute( pipeline_context, execution_plan, step_keys_to_execute=[self.step_key]): yield step_event
def _get_execution_plan_from_run(pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance) -> ExecutionPlan: if ( # need to rebuild execution plan so it matches the subsetted graph pipeline.solids_to_execute is None and pipeline_run.execution_plan_snapshot_id): execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) if execution_plan_snapshot.can_reconstruct_plan: return ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) return create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, instance_ref=instance.get_ref() if instance.is_persistent else None, )
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))