def _create_step_events_for_output(step_context, output): check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(output, "output", Output) step = step_context.step step_output = step.step_output_named(output.output_name) version = resolve_step_output_versions( step_context.execution_plan, step_context.environment_config, step_context.mode_def, )[StepOutputHandle(step_context.step.key, output.output_name)] for output_event in _type_checked_step_output_event_sequence( step_context, output, version): yield output_event step_output_handle = StepOutputHandle.from_step( step=step, output_name=output.output_name) for evt in _set_intermediates(step_context, step_output, step_output_handle, output, version): yield evt for evt in _create_output_materializations(step_context, output.output_name, output.value): yield evt
def test_resolve_memoized_execution_plan_yes_stored_results(): manager = VersionedInMemoryIOManager() versioned_pipeline = versioned_pipeline_factory(manager) speculative_execution_plan = create_execution_plan(versioned_pipeline) resolved_run_config = ResolvedRunConfig.build(versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = resolve_step_output_versions( versioned_pipeline, speculative_execution_plan, resolved_run_config)[step_output_handle] manager.values[(step_output_handle.step_key, step_output_handle.output_name, step_output_version)] = 4 with DagsterInstance.ephemeral() as dagster_instance: memoized_execution_plan = resolve_memoized_execution_plan( speculative_execution_plan, versioned_pipeline, {}, dagster_instance, resolved_run_config, ) assert memoized_execution_plan.step_keys_to_execute == [ "versioned_solid_takes_input" ] expected_handle = StepOutputHandle(step_key="versioned_solid_no_input", output_name="result") assert (memoized_execution_plan.get_step_by_key( "versioned_solid_takes_input").step_input_dict["intput"].source. step_output_handle == expected_handle)
def resolve_unmemoized_steps(self, execution_plan, run_config, mode): """ Returns: List[str]: Step keys for all steps that don't have existing results stored for their versions. """ pipeline_name = execution_plan.pipeline.get_definition().name step_output_versions = resolve_step_output_versions( execution_plan, run_config=run_config, mode=mode) for step_output_handle, version in step_output_versions.items(): if version is None: raise DagsterInvariantViolationError( "While creating a memoized pipeline run, a version is None for step " "{step_output}. Versions must be non-null values when running a memoized " "pipeline.".format( step_output=step_output_handle.step_key)) step_output_addresses = self.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() }) return list({ step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses })
def resolve_memoized_execution_plan(self, execution_plan, run_config, mode): """ Returns: ExecutionPlan: Execution plan configured to only run unmemoized steps. """ pipeline_def = execution_plan.pipeline.get_definition() pipeline_name = pipeline_def.name step_output_versions = resolve_step_output_versions( execution_plan, EnvironmentConfig.build(pipeline_def, run_config, mode), pipeline_def.get_mode_definition(mode), ) if all(version is None for version in step_output_versions.values()): raise DagsterInvariantViolationError( "While creating a memoized pipeline run, no steps have versions. At least one step " "must have a version.") step_output_addresses = self.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version }) step_keys_to_execute = list({ step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses }) return execution_plan.build_memoized_plan(step_keys_to_execute, step_output_addresses)
def test_addresses_for_version(version_storing_context): @solid(version="abc") def solid1(_): yield Output(5, address="some_address") @solid(version="123") def solid2(_, _input1): pass @pipeline def my_pipeline(): solid2(solid1()) with version_storing_context() as ctx: instance, _ = ctx execute_pipeline(instance=instance, pipeline=my_pipeline) step_output_handle = StepOutputHandle("solid1.compute", "result") output_version = resolve_step_output_versions( create_execution_plan(my_pipeline), run_config={}, mode="default")[step_output_handle] assert instance.get_addresses_for_step_output_versions({ ("my_pipeline", step_output_handle): output_version }) == { ("my_pipeline", step_output_handle): "some_address" }
def resolve_unmemoized_steps(self, execution_plan, run_config, mode): """ Returns: List[str]: Step keys for all steps that don't have existing results stored for their versions. """ pipeline_name = execution_plan.pipeline.get_definition().name step_output_versions = resolve_step_output_versions( execution_plan, run_config=run_config, mode=mode) if all(version is None for version in step_output_versions.values()): raise DagsterInvariantViolationError( "While creating a memoized pipeline run, no steps have versions. At least one step " "must have a version.") step_output_addresses = self.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version }) return list({ step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses })
def test_resolve_memoized_execution_plan_partial_versioning(): manager = VersionedInMemoryIOManager() partially_versioned_pipeline = partially_versioned_pipeline_factory( manager) speculative_execution_plan = create_execution_plan( partially_versioned_pipeline) resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = resolve_step_output_versions( partially_versioned_pipeline, speculative_execution_plan, resolved_run_config)[step_output_handle] manager.values[(step_output_handle.step_key, step_output_handle.output_name, step_output_version)] = 4 with DagsterInstance.ephemeral() as instance: assert (resolve_memoized_execution_plan( speculative_execution_plan, partially_versioned_pipeline, {}, instance, resolved_run_config, ).step_keys_to_execute == ["solid_takes_input"])
def resolve_step_output_versions_for_test(execution_plan, run_config=None, mode=None): return resolve_step_output_versions( execution_plan=execution_plan, environment_config=EnvironmentConfig.build(execution_plan.pipeline_def, run_config, mode), mode_def=execution_plan.pipeline_def.get_mode_definition(mode), )
def _type_checked_step_output_event_sequence(step_context, output): from dagster.core.execution.api import create_execution_plan check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(output, "output", Output) step_output = step_context.step.step_output_named(output.output_name) speculative_execution_plan = create_execution_plan( step_context.pipeline_def, run_config=step_context.run_config, mode=step_context.mode_def.name, ) version = resolve_step_output_versions( speculative_execution_plan, run_config=step_context.run_config, mode=step_context.mode_def.name, )[StepOutputHandle(step_context.step.key, output.output_name)] with user_code_error_boundary( DagsterTypeCheckError, lambda: ('In solid "{handle}" the output "{output_name}" received ' "value {output_value} of Python type {output_type} which " "does not pass the typecheck for Dagster type " "{dagster_type_name}. Step {step_key}.").format( handle=str(step_context.step.solid_handle), output_name=output.output_name, output_value=output.value, output_type=type(output.value), dagster_type_name=step_output.dagster_type.name, step_key=step_context.step.key, ), ): type_check = _do_type_check( step_context.for_type(step_output.dagster_type), step_output.dagster_type, output.value) yield _create_step_output_event( step_context, output, type_check=type_check, success=type_check.success, version=version, ) if not type_check.success: raise DagsterTypeCheckDidNotPass( description= "Type check failed for step output {output_name} of type {dagster_type}." .format( output_name=output.output_name, dagster_type=step_output.dagster_type.name, ), metadata_entries=type_check.metadata_entries, dagster_type=step_output.dagster_type, )
def step_output_version( pipeline_def: "PipelineDefinition", execution_plan: "ExecutionPlan", resolved_run_config: "ResolvedRunConfig", step_output_handle: "StepOutputHandle", ) -> Optional[str]: from dagster.core.execution.resolve_versions import resolve_step_output_versions step_output_versions = resolve_step_output_versions( pipeline_def, execution_plan, resolved_run_config) return (step_output_versions[step_output_handle] if step_output_handle in step_output_versions else None)
def _step_output_version( pipeline_def: PipelineDefinition, execution_plan: "ExecutionPlan", environment_config: "EnvironmentConfig", step_output_handle: StepOutputHandle, ) -> Optional[str]: from dagster.core.execution.resolve_versions import resolve_step_output_versions step_output_versions = resolve_step_output_versions( pipeline_def, execution_plan, environment_config) return (step_output_versions[step_output_handle] if step_output_handle in step_output_versions else None)
def test_resolve_step_output_versions_no_external_dependencies(): speculative_execution_plan = create_execution_plan(versioned_pipeline) versions = resolve_step_output_versions(speculative_execution_plan, run_config={}, mode="default") assert (versions[StepOutputHandle( "versioned_solid_no_input.compute", "result")] == versioned_pipeline_expected_step1_output_version()) assert (versions[StepOutputHandle( "versioned_solid_takes_input.compute", "result")] == versioned_pipeline_expected_step2_output_version())
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) pipeline_def = pipeline.get_definition() pipeline_name = pipeline_def.name execution_plan = create_execution_plan(pipeline.get_definition(), run_config=run_config, mode=mode) step_output_versions = resolve_step_output_versions( execution_plan, environment_config=EnvironmentConfig.build(pipeline_def, run_config=run_config, mode=mode), mode_def=pipeline_def.get_mode_definition(mode), ) step_output_addresses = instance.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version }) table = [] for step_output_handle, version in step_output_versions.items(): address = step_output_addresses.get( (pipeline_name, step_output_handle), "None") table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, address, ]) table_str = tabulate(table, headers=["Step Output", "Version", "Address"], tablefmt="github") click.echo(table_str)
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode) execution_plan = ExecutionPlan.build(pipeline, environment_config) step_output_versions = resolve_step_output_versions( pipeline.get_definition(), execution_plan, environment_config) memoized_plan = resolve_memoized_execution_plan(execution_plan, pipeline.get_definition(), run_config, instance, environment_config) # the step keys that we need to execute are those which do not have their inputs populated. step_keys_not_stored = set(memoized_plan.step_keys_to_execute) table = [] for step_output_handle, version in step_output_versions.items(): table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, "stored" if step_output_handle.step_key not in step_keys_not_stored else "to-be-recomputed", ]) table_str = tabulate( table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github") click.echo(table_str)
def resolve_unmemoized_steps(self, execution_plan, run_config, mode): """ Returns: List[str]: Step keys for all steps that don't have existing results stored for their versions. """ pipeline_name = execution_plan.pipeline.get_definition().name step_output_versions = resolve_step_output_versions( execution_plan, run_config=run_config, mode=mode) step_output_addresses = self.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() }) return list({ step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses })
def test_resolve_step_output_versions_no_external_dependencies(): versioned_pipeline = versioned_pipeline_factory() speculative_execution_plan = create_execution_plan(versioned_pipeline, run_config={}, mode="main") resolved_run_config = ResolvedRunConfig.build(versioned_pipeline, run_config={}, mode="main") versions = resolve_step_output_versions(versioned_pipeline, speculative_execution_plan, resolved_run_config) assert (versions[StepOutputHandle( "versioned_solid_no_input", "result")] == versioned_pipeline_expected_step1_output_version()) assert (versions[StepOutputHandle( "versioned_solid_takes_input", "result")] == versioned_pipeline_expected_step2_output_version())
def _type_check_and_store_output( step_context: StepExecutionContext, output: Union[DynamicOutput, Output], input_lineage: List[AssetLineageInfo], ) -> Iterator[DagsterEvent]: check.inst_param(step_context, "step_context", StepExecutionContext) check.inst_param(output, "output", (Output, DynamicOutput)) check.list_param(input_lineage, "input_lineage", AssetLineageInfo) mapping_key = output.mapping_key if isinstance(output, DynamicOutput) else None step_output_handle = StepOutputHandle(step_key=step_context.step.key, output_name=output.output_name, mapping_key=mapping_key) # If we are executing using the execute_in_process API, then we allow for the outputs of solids # to be directly captured to a dictionary after they are computed. if step_context.output_capture is not None: step_context.output_capture[step_output_handle] = output.value # capture output at the step level for threading the computed output values to hook context if step_context.step_output_capture is not None: step_context.step_output_capture[step_output_handle] = output.value version = (resolve_step_output_versions( step_context.pipeline_def, step_context.execution_plan, step_context.resolved_run_config).get(step_output_handle) if MEMOIZED_RUN_TAG in step_context.pipeline.get_definition().tags else None) for output_event in _type_check_output(step_context, step_output_handle, output, version): yield output_event for evt in _store_output(step_context, step_output_handle, output, input_lineage): yield evt for evt in _create_type_materializations(step_context, output.output_name, output.value): yield evt
def default_mode_output_versions(pipeline_def): return resolve_step_output_versions( create_execution_plan(pipeline_def), EnvironmentConfig.build(pipeline_def, {}, "default"), pipeline_def.get_mode_definition("default"), )