コード例 #1
0
ファイル: execute_step.py プロジェクト: G9999/dagster
def _create_step_events_for_output(step_context, output):
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", Output)

    step = step_context.step
    step_output = step.step_output_named(output.output_name)

    version = resolve_step_output_versions(
        step_context.execution_plan,
        step_context.environment_config,
        step_context.mode_def,
    )[StepOutputHandle(step_context.step.key, output.output_name)]

    for output_event in _type_checked_step_output_event_sequence(
            step_context, output, version):
        yield output_event

    step_output_handle = StepOutputHandle.from_step(
        step=step, output_name=output.output_name)

    for evt in _set_intermediates(step_context, step_output,
                                  step_output_handle, output, version):
        yield evt

    for evt in _create_output_materializations(step_context,
                                               output.output_name,
                                               output.value):
        yield evt
コード例 #2
0
def test_resolve_memoized_execution_plan_yes_stored_results():
    manager = VersionedInMemoryIOManager()
    versioned_pipeline = versioned_pipeline_factory(manager)

    speculative_execution_plan = create_execution_plan(versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")
    step_output_version = resolve_step_output_versions(
        versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as dagster_instance:

        memoized_execution_plan = resolve_memoized_execution_plan(
            speculative_execution_plan,
            versioned_pipeline,
            {},
            dagster_instance,
            resolved_run_config,
        )

        assert memoized_execution_plan.step_keys_to_execute == [
            "versioned_solid_takes_input"
        ]

        expected_handle = StepOutputHandle(step_key="versioned_solid_no_input",
                                           output_name="result")

        assert (memoized_execution_plan.get_step_by_key(
            "versioned_solid_takes_input").step_input_dict["intput"].source.
                step_output_handle == expected_handle)
コード例 #3
0
ファイル: __init__.py プロジェクト: lewismacdonald/dagster
    def resolve_unmemoized_steps(self, execution_plan, run_config, mode):
        """
        Returns:
            List[str]: Step keys for all steps that don't have existing results stored for their
                versions.
        """
        pipeline_name = execution_plan.pipeline.get_definition().name
        step_output_versions = resolve_step_output_versions(
            execution_plan, run_config=run_config, mode=mode)

        for step_output_handle, version in step_output_versions.items():
            if version is None:
                raise DagsterInvariantViolationError(
                    "While creating a memoized pipeline run, a version is None for step "
                    "{step_output}. Versions must be non-null values when running a memoized "
                    "pipeline.".format(
                        step_output=step_output_handle.step_key))
        step_output_addresses = self.get_addresses_for_step_output_versions({
            (pipeline_name, step_output_handle): version
            for step_output_handle, version in step_output_versions.items()
        })

        return list({
            step_output_handle.step_key
            for step_output_handle in step_output_versions.keys()
            if (pipeline_name, step_output_handle) not in step_output_addresses
        })
コード例 #4
0
    def resolve_memoized_execution_plan(self, execution_plan, run_config,
                                        mode):
        """
        Returns:
            ExecutionPlan: Execution plan configured to only run unmemoized steps.
        """
        pipeline_def = execution_plan.pipeline.get_definition()
        pipeline_name = pipeline_def.name

        step_output_versions = resolve_step_output_versions(
            execution_plan,
            EnvironmentConfig.build(pipeline_def, run_config, mode),
            pipeline_def.get_mode_definition(mode),
        )
        if all(version is None for version in step_output_versions.values()):
            raise DagsterInvariantViolationError(
                "While creating a memoized pipeline run, no steps have versions. At least one step "
                "must have a version.")

        step_output_addresses = self.get_addresses_for_step_output_versions({
            (pipeline_name, step_output_handle): version
            for step_output_handle, version in step_output_versions.items()
            if version
        })

        step_keys_to_execute = list({
            step_output_handle.step_key
            for step_output_handle in step_output_versions.keys()
            if (pipeline_name, step_output_handle) not in step_output_addresses
        })

        return execution_plan.build_memoized_plan(step_keys_to_execute,
                                                  step_output_addresses)
コード例 #5
0
def test_addresses_for_version(version_storing_context):
    @solid(version="abc")
    def solid1(_):
        yield Output(5, address="some_address")

    @solid(version="123")
    def solid2(_, _input1):
        pass

    @pipeline
    def my_pipeline():
        solid2(solid1())

    with version_storing_context() as ctx:
        instance, _ = ctx
        execute_pipeline(instance=instance, pipeline=my_pipeline)

        step_output_handle = StepOutputHandle("solid1.compute", "result")
        output_version = resolve_step_output_versions(
            create_execution_plan(my_pipeline), run_config={},
            mode="default")[step_output_handle]
        assert instance.get_addresses_for_step_output_versions({
            ("my_pipeline", step_output_handle):
            output_version
        }) == {
            ("my_pipeline", step_output_handle): "some_address"
        }
コード例 #6
0
    def resolve_unmemoized_steps(self, execution_plan, run_config, mode):
        """
        Returns:
            List[str]: Step keys for all steps that don't have existing results stored for their
                versions.
        """
        pipeline_name = execution_plan.pipeline.get_definition().name
        step_output_versions = resolve_step_output_versions(
            execution_plan, run_config=run_config, mode=mode)
        if all(version is None for version in step_output_versions.values()):
            raise DagsterInvariantViolationError(
                "While creating a memoized pipeline run, no steps have versions. At least one step "
                "must have a version.")

        step_output_addresses = self.get_addresses_for_step_output_versions({
            (pipeline_name, step_output_handle): version
            for step_output_handle, version in step_output_versions.items()
            if version
        })

        return list({
            step_output_handle.step_key
            for step_output_handle in step_output_versions.keys()
            if (pipeline_name, step_output_handle) not in step_output_addresses
        })
コード例 #7
0
def test_resolve_memoized_execution_plan_partial_versioning():
    manager = VersionedInMemoryIOManager()

    partially_versioned_pipeline = partially_versioned_pipeline_factory(
        manager)
    speculative_execution_plan = create_execution_plan(
        partially_versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")

    step_output_version = resolve_step_output_versions(
        partially_versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as instance:
        assert (resolve_memoized_execution_plan(
            speculative_execution_plan,
            partially_versioned_pipeline,
            {},
            instance,
            resolved_run_config,
        ).step_keys_to_execute == ["solid_takes_input"])
コード例 #8
0
def resolve_step_output_versions_for_test(execution_plan,
                                          run_config=None,
                                          mode=None):
    return resolve_step_output_versions(
        execution_plan=execution_plan,
        environment_config=EnvironmentConfig.build(execution_plan.pipeline_def,
                                                   run_config, mode),
        mode_def=execution_plan.pipeline_def.get_mode_definition(mode),
    )
コード例 #9
0
ファイル: execute_step.py プロジェクト: markjm610/dagster
def _type_checked_step_output_event_sequence(step_context, output):
    from dagster.core.execution.api import create_execution_plan

    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", Output)

    step_output = step_context.step.step_output_named(output.output_name)
    speculative_execution_plan = create_execution_plan(
        step_context.pipeline_def,
        run_config=step_context.run_config,
        mode=step_context.mode_def.name,
    )

    version = resolve_step_output_versions(
        speculative_execution_plan,
        run_config=step_context.run_config,
        mode=step_context.mode_def.name,
    )[StepOutputHandle(step_context.step.key, output.output_name)]
    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda: ('In solid "{handle}" the output "{output_name}" received '
                     "value {output_value} of Python type {output_type} which "
                     "does not pass the typecheck for Dagster type "
                     "{dagster_type_name}. Step {step_key}.").format(
                         handle=str(step_context.step.solid_handle),
                         output_name=output.output_name,
                         output_value=output.value,
                         output_type=type(output.value),
                         dagster_type_name=step_output.dagster_type.name,
                         step_key=step_context.step.key,
                     ),
    ):
        type_check = _do_type_check(
            step_context.for_type(step_output.dagster_type),
            step_output.dagster_type, output.value)

        yield _create_step_output_event(
            step_context,
            output,
            type_check=type_check,
            success=type_check.success,
            version=version,
        )

        if not type_check.success:
            raise DagsterTypeCheckDidNotPass(
                description=
                "Type check failed for step output {output_name} of type {dagster_type}."
                .format(
                    output_name=output.output_name,
                    dagster_type=step_output.dagster_type.name,
                ),
                metadata_entries=type_check.metadata_entries,
                dagster_type=step_output.dagster_type,
            )
コード例 #10
0
ファイル: output.py プロジェクト: keyz/dagster
def step_output_version(
    pipeline_def: "PipelineDefinition",
    execution_plan: "ExecutionPlan",
    resolved_run_config: "ResolvedRunConfig",
    step_output_handle: "StepOutputHandle",
) -> Optional[str]:
    from dagster.core.execution.resolve_versions import resolve_step_output_versions

    step_output_versions = resolve_step_output_versions(
        pipeline_def, execution_plan, resolved_run_config)
    return (step_output_versions[step_output_handle]
            if step_output_handle in step_output_versions else None)
コード例 #11
0
def _step_output_version(
    pipeline_def: PipelineDefinition,
    execution_plan: "ExecutionPlan",
    environment_config: "EnvironmentConfig",
    step_output_handle: StepOutputHandle,
) -> Optional[str]:
    from dagster.core.execution.resolve_versions import resolve_step_output_versions

    step_output_versions = resolve_step_output_versions(
        pipeline_def, execution_plan, environment_config)
    return (step_output_versions[step_output_handle]
            if step_output_handle in step_output_versions else None)
コード例 #12
0
def test_resolve_step_output_versions_no_external_dependencies():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    versions = resolve_step_output_versions(speculative_execution_plan,
                                            run_config={},
                                            mode="default")

    assert (versions[StepOutputHandle(
        "versioned_solid_no_input.compute",
        "result")] == versioned_pipeline_expected_step1_output_version())
    assert (versions[StepOutputHandle(
        "versioned_solid_takes_input.compute",
        "result")] == versioned_pipeline_expected_step2_output_version())
コード例 #13
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)
    pipeline_def = pipeline.get_definition()
    pipeline_name = pipeline_def.name
    execution_plan = create_execution_plan(pipeline.get_definition(),
                                           run_config=run_config,
                                           mode=mode)
    step_output_versions = resolve_step_output_versions(
        execution_plan,
        environment_config=EnvironmentConfig.build(pipeline_def,
                                                   run_config=run_config,
                                                   mode=mode),
        mode_def=pipeline_def.get_mode_definition(mode),
    )
    step_output_addresses = instance.get_addresses_for_step_output_versions({
        (pipeline_name, step_output_handle): version
        for step_output_handle, version in step_output_versions.items()
        if version
    })
    table = []
    for step_output_handle, version in step_output_versions.items():
        address = step_output_addresses.get(
            (pipeline_name, step_output_handle), "None")
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            address,
        ])
    table_str = tabulate(table,
                         headers=["Step Output", "Version", "Address"],
                         tablefmt="github")
    click.echo(table_str)
コード例 #14
0
ファイル: pipeline.py プロジェクト: sarahmk125/dagster
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    environment_config = EnvironmentConfig.build(pipeline.get_definition(),
                                                 run_config,
                                                 mode=mode)
    execution_plan = ExecutionPlan.build(pipeline, environment_config)

    step_output_versions = resolve_step_output_versions(
        pipeline.get_definition(), execution_plan, environment_config)
    memoized_plan = resolve_memoized_execution_plan(execution_plan,
                                                    pipeline.get_definition(),
                                                    run_config, instance,
                                                    environment_config)
    # the step keys that we need to execute are those which do not have their inputs populated.
    step_keys_not_stored = set(memoized_plan.step_keys_to_execute)
    table = []
    for step_output_handle, version in step_output_versions.items():
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            "stored" if step_output_handle.step_key not in step_keys_not_stored
            else "to-be-recomputed",
        ])
    table_str = tabulate(
        table,
        headers=["Step Output", "Version", "Status of Output"],
        tablefmt="github")
    click.echo(table_str)
コード例 #15
0
    def resolve_unmemoized_steps(self, execution_plan, run_config, mode):
        """
        Returns:
            List[str]: Step keys for all steps that don't have existing results stored for their
                versions.
        """
        pipeline_name = execution_plan.pipeline.get_definition().name
        step_output_versions = resolve_step_output_versions(
            execution_plan, run_config=run_config, mode=mode)
        step_output_addresses = self.get_addresses_for_step_output_versions({
            (pipeline_name, step_output_handle): version
            for step_output_handle, version in step_output_versions.items()
        })

        return list({
            step_output_handle.step_key
            for step_output_handle in step_output_versions.keys()
            if (pipeline_name, step_output_handle) not in step_output_addresses
        })
コード例 #16
0
def test_resolve_step_output_versions_no_external_dependencies():
    versioned_pipeline = versioned_pipeline_factory()
    speculative_execution_plan = create_execution_plan(versioned_pipeline,
                                                       run_config={},
                                                       mode="main")
    resolved_run_config = ResolvedRunConfig.build(versioned_pipeline,
                                                  run_config={},
                                                  mode="main")

    versions = resolve_step_output_versions(versioned_pipeline,
                                            speculative_execution_plan,
                                            resolved_run_config)

    assert (versions[StepOutputHandle(
        "versioned_solid_no_input",
        "result")] == versioned_pipeline_expected_step1_output_version())
    assert (versions[StepOutputHandle(
        "versioned_solid_takes_input",
        "result")] == versioned_pipeline_expected_step2_output_version())
コード例 #17
0
def _type_check_and_store_output(
    step_context: StepExecutionContext,
    output: Union[DynamicOutput, Output],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))
    check.list_param(input_lineage, "input_lineage", AssetLineageInfo)

    mapping_key = output.mapping_key if isinstance(output,
                                                   DynamicOutput) else None

    step_output_handle = StepOutputHandle(step_key=step_context.step.key,
                                          output_name=output.output_name,
                                          mapping_key=mapping_key)

    # If we are executing using the execute_in_process API, then we allow for the outputs of solids
    # to be directly captured to a dictionary after they are computed.
    if step_context.output_capture is not None:
        step_context.output_capture[step_output_handle] = output.value
    # capture output at the step level for threading the computed output values to hook context
    if step_context.step_output_capture is not None:
        step_context.step_output_capture[step_output_handle] = output.value

    version = (resolve_step_output_versions(
        step_context.pipeline_def, step_context.execution_plan,
        step_context.resolved_run_config).get(step_output_handle)
               if MEMOIZED_RUN_TAG
               in step_context.pipeline.get_definition().tags else None)

    for output_event in _type_check_output(step_context, step_output_handle,
                                           output, version):
        yield output_event

    for evt in _store_output(step_context, step_output_handle, output,
                             input_lineage):
        yield evt

    for evt in _create_type_materializations(step_context, output.output_name,
                                             output.value):
        yield evt
コード例 #18
0
def default_mode_output_versions(pipeline_def):
    return resolve_step_output_versions(
        create_execution_plan(pipeline_def),
        EnvironmentConfig.build(pipeline_def, {}, "default"),
        pipeline_def.get_mode_definition("default"),
    )