Example #1
0
def test_cant_load_old_snapshot():
    snapshot = deserialize_json_to_dagster_namedtuple(
        OLD_EXECUTION_PLAN_SNAPSHOT)
    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "Tried to reconstruct an old ExecutionPlanSnapshot that was created before snapshots had enough information to fully reconstruct the ExecutionPlan",
    ):
        ExecutionPlan.rebuild_from_snapshot("noop_pipeline", snapshot)
Example #2
0
def test_execution_plan_snapshot_backcompat():

    src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/")
    snapshot_dirs = [
        f for f in os.listdir(src_dir)
        if not os.path.isfile(os.path.join(src_dir, f))
    ]
    for snapshot_dir_path in snapshot_dirs:
        print(f"Executing a saved run from {snapshot_dir_path}")  # pylint: disable=print-call

        with copy_directory(os.path.join(src_dir,
                                         snapshot_dir_path)) as test_dir:
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                runs = instance.get_runs()
                assert len(runs) == 1

                run = runs[0]
                assert run.status == PipelineRunStatus.NOT_STARTED

                the_pipeline = InMemoryPipeline(dynamic_pipeline)

                # First create a brand new plan from the pipeline and validate it
                new_plan = create_execution_plan(the_pipeline,
                                                 run_config=run.run_config)
                _validate_execution_plan(new_plan)

                # Create a snapshot and rebuild it, validate the rebuilt plan
                new_plan_snapshot = snapshot_from_execution_plan(
                    new_plan, run.pipeline_snapshot_id)
                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", new_plan_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Then validate the plan built from the historical snapshot on the run
                stored_snapshot = instance.get_execution_plan_snapshot(
                    run.execution_plan_snapshot_id)

                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", stored_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Finally, execute the run (using the historical execution plan snapshot)
                result = execute_run(the_pipeline,
                                     run,
                                     instance,
                                     raise_on_error=True)
                assert result.success
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()

    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        )
Example #4
0
def create_execution_plan(
    pipeline: Union[IPipeline, PipelineDefinition],
    run_config: Optional[dict] = None,
    mode: Optional[str] = None,
    step_keys_to_execute: Optional[List[str]] = None,
) -> ExecutionPlan:
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)

    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode,
                               "mode",
                               default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config,
                                                 mode=mode)

    return ExecutionPlan.build(pipeline,
                               environment_config,
                               mode=mode,
                               step_keys_to_execute=step_keys_to_execute)
Example #5
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {"solids": {
            "add_four": {
                "inputs": {
                    "num": {
                        "value": 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryPipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        "add_four.add_two.add_one",
        "add_four.add_two.add_one_2",
        "add_four.add_two_2.add_one",
        "add_four.add_two_2.add_one_2",
        "div_four.div_two",
        "div_four.div_two_2",
        "int_to_float",
    }
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline(using_file_system=True)

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(pipeline, )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         resolved_run_config),
        InMemoryPipeline(pipeline),
        instance,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterExecutionLoadInputError" in failures[
        0].event_specific_data.error.message
Example #7
0
File: api.py Project: prezi/dagster
def create_execution_plan(
    pipeline: Union[IPipeline, PipelineDefinition],
    run_config: Optional[dict] = None,
    mode: Optional[str] = None,
    step_keys_to_execute: Optional[List[str]] = None,
    known_state: KnownExecutionState = None,
) -> ExecutionPlan:
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode,
                               "mode",
                               default=pipeline_def.get_default_mode_name())
    check.opt_nullable_list_param(step_keys_to_execute,
                                  "step_keys_to_execute",
                                  of_type=str)

    resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                  run_config,
                                                  mode=mode)

    return ExecutionPlan.build(
        pipeline,
        resolved_run_config,
        step_keys_to_execute=step_keys_to_execute,
        known_state=known_state,
    )
Example #8
0
def test_compile():
    # TODO: remove dependency on legacy_examples
    # https://github.com/dagster-io/dagster/issues/2653
    environment_config = EnvironmentConfig.build(
        composition,
        {'solids': {
            'add_four': {
                'inputs': {
                    'num': {
                        'value': 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
        'int_to_float',
    }
Example #9
0
def create_execution_plan(pipeline,
                          environment_dict=None,
                          mode=None,
                          step_keys_to_execute=None):
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)

    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    mode = check.opt_str_param(mode,
                               'mode',
                               default=pipeline_def.get_default_mode_name())
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 environment_dict,
                                                 mode=mode)

    return ExecutionPlan.build(pipeline,
                               environment_config,
                               mode=mode,
                               step_keys_to_execute=step_keys_to_execute)
Example #10
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}}
    result = execute_pipeline(pipeline_def,
                              run_config=run_config,
                              instance=instance)

    assert result.success

    ## re-execute add_two

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def),
                                         environment_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan.build_subset_plan(["add_two"], pipeline_def,
                                             environment_config),
            InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
Example #11
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {'solids': {
            'add_four': {
                'inputs': {
                    'num': {
                        'value': 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
        'int_to_float',
    }
Example #12
0
def test_compile():
    run_config = RunConfig()
    environment_config = EnvironmentConfig.build(
        composition,
        {'solids': {
            'add_four': {
                'inputs': {
                    'num': {
                        'value': 1
                    }
                }
            }
        }},
        run_config=None)

    plan = ExecutionPlan.build(
        composition, environment_config,
        composition.get_mode_definition(run_config.mode))

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        'add_four.add_two.add_one',
        'add_four.add_two.add_one_2',
        'add_four.add_two_2.add_one',
        'add_four.add_two_2.add_one_2',
        'div_four.div_two',
        'div_four.div_two_2',
    }
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         environment_config),
        InMemoryPipeline(pipeline),
        instance,
        run_config=run_config,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterStepOutputNotFoundError" in failures[
        0].event_specific_data.error.message
Example #14
0
def execute_run_host_mode(
    pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    executor_defs: Optional[List[ExecutorDefinition]] = None,
    raise_on_error: bool = False,
):
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.opt_list_param(executor_defs,
                         "executor_defs",
                         of_type=ExecutorDefinition)
    executor_defs = executor_defs if executor_defs != None else default_executors

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )

    if pipeline_run.solids_to_execute:
        pipeline = pipeline.subset_for_execution_from_existing_pipeline(
            frozenset(pipeline_run.solids_to_execute))

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        pipeline_run.execution_plan_snapshot_id)
    execution_plan = ExecutionPlan.rebuild_from_snapshot(
        pipeline_run.pipeline_name,
        execution_plan_snapshot,
    )

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=host_mode_execution_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            run_config=pipeline_run.run_config,
            pipeline_run=pipeline_run,
            instance=instance,
            raise_on_error=raise_on_error,
            executor_defs=executor_defs,
            output_capture=None,
        ),
    )
    event_list = list(_execute_run_iterable)
    return event_list
Example #15
0
def inner_plan_execution_iterator(
        pipeline_context: PlanExecutionContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context",
                     PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(
            retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step,
                    active_execution.retry_state.get_attempt_count(step.key)),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                ("Expected step context for solid {solid_name} to have all required resources, but "
                 "missing {missing_resources}.").format(
                     solid_name=step_context.solid.name,
                     missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                    step_context.pipeline_run, step_context.step.key):

                for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
def test_using_intermediate_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:

        run_config = {"intermediate_storage": {"filesystem": {}}}

        pipeline = reconstructable(define_inty_pipeline)

        environment_config = EnvironmentConfig.build(
            pipeline.get_definition(),
            run_config=run_config,
        )
        execution_plan = ExecutionPlan.build(
            pipeline,
            environment_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, pipeline_run.run_id)

        assert get_step_output(return_one_step_events, "return_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("return_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("return_one")).obj == 1)

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("add_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("add_one")).obj == 2)
Example #17
0
def create_execution_plan(pipeline, environment_dict=None, mode=None):
    check.inst_param(pipeline, 'pipeline', PipelineDefinition)
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    check.opt_str_param(mode, 'mode')
    environment_config = create_environment_config(pipeline, environment_dict,
                                                   mode)
    return ExecutionPlan.build(pipeline, environment_config)
Example #18
0
def _execute_pipeline_iterator(context_or_failure_event):
    # Due to use of context managers, if the user land code in context or resource init fails
    # we can get either a pipeline_context or the failure event here.
    if (isinstance(context_or_failure_event, DagsterEvent)
            and context_or_failure_event.event_type
            == DagsterEventType.PIPELINE_INIT_FAILURE):
        yield context_or_failure_event
        return

    pipeline_context = context_or_failure_event
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemPipelineExecutionContext)
    yield DagsterEvent.pipeline_start(pipeline_context)

    execution_plan = ExecutionPlan.build(
        pipeline_context.pipeline_def,
        pipeline_context.environment_config,
        pipeline_context.mode_def,
    )

    steps = execution_plan.topological_steps()

    if not steps:
        pipeline_context.log.debug(
            'Pipeline {pipeline} has no nodes and no execution will happen'.
            format(pipeline=pipeline_context.pipeline_def.display_name))
        yield DagsterEvent.pipeline_success(pipeline_context)
        return

    _setup_reexecution(pipeline_context.run_config, pipeline_context,
                       execution_plan)

    pipeline_context.log.debug(
        'About to execute the compute node graph in the following order {order}'
        .format(order=[step.key for step in steps]))

    check.invariant(
        len([
            step_input for step_input in steps[0].step_inputs
            if step_input.is_from_output
        ]) == 0)

    pipeline_success = True

    try:
        for event in invoke_executor_on_plan(
                pipeline_context, execution_plan,
                pipeline_context.run_config.step_keys_to_execute):
            if event.is_step_failure:
                pipeline_success = False
            yield event
    finally:
        if pipeline_success:
            yield DagsterEvent.pipeline_success(pipeline_context)
        else:
            yield DagsterEvent.pipeline_failure(pipeline_context)
Example #19
0
def create_execution_plan(pipeline, environment_dict=None, run_config=None):
    check.inst_param(pipeline, 'pipeline', PipelineDefinition)
    environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str)
    run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, RunConfig())

    environment_config = EnvironmentConfig.build(pipeline, environment_dict, run_config)

    return ExecutionPlan.build(
        pipeline, environment_config, pipeline.get_mode_definition(run_config.mode)
    )
Example #20
0
def execute_run_host_mode(
    pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None,
    raise_on_error: bool = False,
):
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn")

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )

    if pipeline_run.solids_to_execute:
        pipeline = pipeline.subset_for_execution_from_existing_pipeline(
            pipeline_run.solids_to_execute)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        pipeline_run.execution_plan_snapshot_id)
    execution_plan = ExecutionPlan.rebuild_from_snapshot(
        pipeline_run.pipeline_name,
        execution_plan_snapshot,
    )

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=HostModeRunWorkerExecutionContextManager(
            execution_plan=execution_plan,
            recon_pipeline=pipeline,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            get_executor_def_fn=get_executor_def_fn,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    return event_list
def test_using_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:
        pipeline = reconstructable(define_reconstructable_inty_pipeline)

        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(), )
        execution_plan = ExecutionPlan.build(
            pipeline,
            resolved_run_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(return_one_step_events, "return_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "return_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "add_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 2
def test_using_intermediates_file_system_for_subplan():
    pipeline = define_inty_pipeline()

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )

    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("return_one")).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    assert intermediate_storage.has_intermediate(None,
                                                 StepOutputHandle("add_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 2
Example #23
0
def rebuild_execution_plan_from_snapshot(
    pipeline: IPipeline,
    run_config: Optional[dict],
    mode: Optional[str],
    execution_plan_snapshot: ExecutionPlanSnapshot,
) -> ExecutionPlan:
    pipeline_def = pipeline.get_definition()
    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config,
                                                 mode=mode)
    return ExecutionPlan.rebuild_from_snapshot(
        pipeline,
        pipeline_def.name,
        execution_plan_snapshot,
        environment_config,
    )
Example #24
0
File: api.py Project: prezi/dagster
def _get_execution_plan_from_run(pipeline: IPipeline,
                                 pipeline_run: PipelineRun,
                                 instance: DagsterInstance) -> ExecutionPlan:
    if pipeline_run.execution_plan_snapshot_id:
        execution_plan_snapshot = instance.get_execution_plan_snapshot(
            pipeline_run.execution_plan_snapshot_id)
        if execution_plan_snapshot.can_reconstruct_plan:
            return ExecutionPlan.rebuild_from_snapshot(
                pipeline_run.pipeline_name,
                execution_plan_snapshot,
            )
    return create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )
def test_using_file_system_for_subplan():
    pipeline = define_inty_pipeline(using_file_system=True)

    instance = DagsterInstance.ephemeral()

    resolved_run_config = ResolvedRunConfig.build(pipeline)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline),
                                         resolved_run_config)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    with open(
            os.path.join(instance.storage_directory(), pipeline_run.run_id,
                         "return_one", "result"),
            "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    with open(
            os.path.join(instance.storage_directory(), pipeline_run.run_id,
                         "add_one", "result"),
            "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 2
Example #26
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    environment_config = EnvironmentConfig.build(pipeline.get_definition(),
                                                 run_config,
                                                 mode=mode)
    execution_plan = ExecutionPlan.build(pipeline, environment_config)

    step_output_versions = resolve_step_output_versions(
        pipeline.get_definition(), execution_plan, environment_config)
    memoized_plan = resolve_memoized_execution_plan(execution_plan,
                                                    pipeline.get_definition(),
                                                    run_config, instance,
                                                    environment_config)
    # the step keys that we need to execute are those which do not have their inputs populated.
    step_keys_not_stored = set(memoized_plan.step_keys_to_execute)
    table = []
    for step_output_handle, version in step_output_versions.items():
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            "stored" if step_output_handle.step_key not in step_keys_not_stored
            else "to-be-recomputed",
        ])
    table_str = tabulate(
        table,
        headers=["Step Output", "Version", "Status of Output"],
        tablefmt="github")
    click.echo(table_str)
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()
    instance = DagsterInstance.ephemeral()

    environment_config = EnvironmentConfig.build(pipeline, )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute"]

    assert str(exc_info.value
               ) == "Can not build subset plan from unknown step: nope.compute"

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(
                ["nope.compute", "nuh_uh.compute"], pipeline,
                environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"]

    assert (
        str(exc_info.value) ==
        "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute"
    )
Example #28
0
    def execute(self):
        from dagster.core.execution.api import scoped_pipeline_context

        check.inst(self.run_config.executor_config, MultiprocessExecutorConfig)
        pipeline = self.run_config.executor_config.handle.build_pipeline_definition(
        )

        with scoped_pipeline_context(
                pipeline, self.environment_dict,
                self.run_config.with_tags(
                    pid=str(os.getpid()))) as pipeline_context:

            execution_plan = ExecutionPlan.build(
                pipeline_context.pipeline_def,
                pipeline_context.environment_config)

            for step_event in InProcessEngine.execute(
                    pipeline_context,
                    execution_plan,
                    step_keys_to_execute=[self.step_key]):
                yield step_event
Example #29
0
def _get_execution_plan_from_run(pipeline: IPipeline,
                                 pipeline_run: PipelineRun,
                                 instance: DagsterInstance) -> ExecutionPlan:
    if (
            # need to rebuild execution plan so it matches the subsetted graph
            pipeline.solids_to_execute is None
            and pipeline_run.execution_plan_snapshot_id):
        execution_plan_snapshot = instance.get_execution_plan_snapshot(
            pipeline_run.execution_plan_snapshot_id)
        if execution_plan_snapshot.can_reconstruct_plan:
            return ExecutionPlan.rebuild_from_snapshot(
                pipeline_run.pipeline_name,
                execution_plan_snapshot,
            )
    return create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
        instance_ref=instance.get_ref() if instance.is_persistent else None,
    )
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        "storage": {
            "filesystem": {}
        },
        "intermediate_storage": {
            "in_memory": {}
        }
    }

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert not intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))