def test_default_unmemoized_steps():
    speculative_execution_plan = create_execution_plan(no_version_pipeline)

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
        ("While creating a memoized pipeline run, no steps have versions. At least one step "
         "must have a version."),
    ):
        resolve_memoized_execution_plan(speculative_execution_plan)
Example #2
0
def test_resolve_memoized_execution_plan_yes_stored_results():
    manager = VersionedInMemoryIOManager()
    versioned_pipeline = versioned_pipeline_factory(manager)
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")
    step_output_version = speculative_execution_plan.resolve_step_output_versions()[
        step_output_handle
    ]
    manager.values[
        (step_output_handle.step_key, step_output_handle.output_name, step_output_version)
    ] = 4

    with DagsterInstance.ephemeral() as dagster_instance:

        memoized_execution_plan = resolve_memoized_execution_plan(
            speculative_execution_plan, {}, dagster_instance
        )

        assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input"]

        expected_handle = StepOutputHandle(
            step_key="versioned_solid_no_input", output_name="result"
        )

        assert (
            memoized_execution_plan.get_step_by_key("versioned_solid_takes_input")
            .step_input_dict["intput"]
            .source.step_output_handle
            == expected_handle
        )
Example #3
0
def test_resolve_memoized_execution_plan_partial_versioning():
    manager = VersionedInMemoryIOManager()

    partially_versioned_pipeline = partially_versioned_pipeline_factory(
        manager)
    speculative_execution_plan = create_execution_plan(
        partially_versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")

    step_output_version = resolve_step_output_versions(
        partially_versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as instance:
        assert (resolve_memoized_execution_plan(
            speculative_execution_plan,
            partially_versioned_pipeline,
            {},
            instance,
            resolved_run_config,
        ).step_keys_to_execute == ["solid_takes_input"])
Example #4
0
def get_step_keys_to_execute(pipeline, run_config, mode, instance):
    memoized_execution_plan = resolve_memoized_execution_plan(
        create_execution_plan(pipeline, run_config=run_config, mode=mode),
        pipeline,
        run_config,
        instance,
        ResolvedRunConfig.build(pipeline, run_config=run_config, mode=mode),
    )
    return memoized_execution_plan.step_keys_to_execute
def test_resolve_memoized_execution_plan_no_stored_results():
    versioned_pipeline = versioned_pipeline_factory(VersionedInMemoryObjectManager())
    speculative_execution_plan = create_execution_plan(versioned_pipeline)

    memoized_execution_plan = resolve_memoized_execution_plan(speculative_execution_plan)

    assert set(memoized_execution_plan.step_keys_to_execute) == {
        "versioned_solid_no_input",
        "versioned_solid_takes_input",
    }
Example #6
0
def test_resolve_memoized_execution_plan_no_stored_results():
    versioned_pipeline = versioned_pipeline_factory(VersionedInMemoryIOManager())
    speculative_execution_plan = create_execution_plan(versioned_pipeline)

    with DagsterInstance.ephemeral() as dagster_instance:
        memoized_execution_plan = resolve_memoized_execution_plan(
            speculative_execution_plan, {}, dagster_instance
        )

        assert set(memoized_execution_plan.step_keys_to_execute) == {
            "versioned_solid_no_input",
            "versioned_solid_takes_input",
        }
def test_resolve_memoized_execution_plan_partial_versioning():
    object_manager = VersionedInMemoryObjectManager()

    partially_versioned_pipeline = partially_versioned_pipeline_factory(object_manager)
    speculative_execution_plan = create_execution_plan(partially_versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")

    step_output_version = speculative_execution_plan.resolve_step_output_versions()[
        step_output_handle
    ]
    object_manager.values[
        (step_output_handle.step_key, step_output_handle.output_name, step_output_version)
    ] = 4

    assert resolve_memoized_execution_plan(speculative_execution_plan).step_keys_to_execute == [
        "solid_takes_input"
    ]
Example #8
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    environment_config = EnvironmentConfig.build(pipeline.get_definition(),
                                                 run_config,
                                                 mode=mode)
    execution_plan = ExecutionPlan.build(pipeline, environment_config)

    step_output_versions = resolve_step_output_versions(
        pipeline.get_definition(), execution_plan, environment_config)
    memoized_plan = resolve_memoized_execution_plan(execution_plan,
                                                    pipeline.get_definition(),
                                                    run_config, instance,
                                                    environment_config)
    # the step keys that we need to execute are those which do not have their inputs populated.
    step_keys_not_stored = set(memoized_plan.step_keys_to_execute)
    table = []
    for step_output_handle, version in step_output_versions.items():
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            "stored" if step_output_handle.step_key not in step_keys_not_stored
            else "to-be-recomputed",
        ])
    table_str = tabulate(
        table,
        headers=["Step Output", "Version", "Status of Output"],
        tablefmt="github")
    click.echo(table_str)
def test_resolve_memoized_execution_plan_yes_stored_results():
    object_manager = VersionedInMemoryObjectManager()
    versioned_pipeline = versioned_pipeline_factory(object_manager)
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")
    step_output_version = speculative_execution_plan.resolve_step_output_versions()[
        step_output_handle
    ]
    object_manager.values[
        (step_output_handle.step_key, step_output_handle.output_name, step_output_version)
    ] = 4

    memoized_execution_plan = resolve_memoized_execution_plan(speculative_execution_plan)

    assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input"]

    expected_handle = StepOutputHandle(step_key="versioned_solid_no_input", output_name="result")

    assert (
        memoized_execution_plan.get_step_by_key("versioned_solid_takes_input")
        .step_input_dict["intput"]
        .source.step_output_handle
        == expected_handle
    )
def get_step_keys_to_execute(pipeline, run_config, mode):
    memoized_execution_plan = resolve_memoized_execution_plan(
        create_execution_plan(pipeline, run_config=run_config, mode=mode),
        run_config)
    return memoized_execution_plan.step_keys_to_execute
Example #11
0
File: api.py Project: prezi/dagster
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline.")

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run,
                                                  instance)

    if is_memoized_run(pipeline_run.tags):
        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(), pipeline_run.run_config,
            pipeline_run.mode)

        execution_plan = resolve_memoized_execution_plan(
            execution_plan,
            pipeline.get_definition(),
            pipeline_run.run_config,
            instance,
            resolved_run_config,
        )

    output_capture: Optional[Dict[StepOutputHandle, Any]] = {}

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=orchestration_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
            executor_defs=None,
            output_capture=output_capture,
        ),
    )
    event_list = list(_execute_run_iterable)

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline,
            pipeline_run.run_config,
            pipeline_run,
            instance,
        ),
        output_capture=output_capture,
    )
Example #12
0
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline.")

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    if is_memoized_run(pipeline_run.tags):
        execution_plan = resolve_memoized_execution_plan(execution_plan)

    _execute_run_iterable = _ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=_pipeline_execution_iterator,
        execution_context_manager=PipelineExecutionContextManager(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    # workaround for mem_io_manager to work in reconstruct_context, e.g. result.result_for_solid
    # in-memory values dict will get lost when the resource is re-initiated in reconstruct_context
    # so instead of re-initiating every single resource, we pass the resource instances to
    # reconstruct_context directly to avoid re-building from resource def.
    resource_instances_to_override = {}
    if pipeline_context:  # None if we have a pipeline failure
        for (
                key,
                resource_instance,
        ) in pipeline_context.scoped_resources_builder.resource_instance_dict.items(
        ):
            if isinstance(resource_instance, InMemoryIOManager):
                resource_instances_to_override[key] = resource_instance

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda hardcoded_resources_arg: scoped_pipeline_context(
            execution_plan,
            pipeline_run.run_config,
            pipeline_run,
            instance,
            intermediate_storage=pipeline_context.intermediate_storage,
            resource_instances_to_override=hardcoded_resources_arg,
        ),
        resource_instances_to_override=resource_instances_to_override,
    )
Example #13
0
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False):
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline.")

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    if is_memoized_run(pipeline_run.tags):
        execution_plan = resolve_memoized_execution_plan(execution_plan)

    _execute_run_iterable = _ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=_pipeline_execution_iterator,
        execution_context_manager=PipelineExecutionContextManager(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline_run.run_config,
            pipeline_run,
            instance,
            intermediate_storage=pipeline_context.intermediate_storage,
        ),
    )