def test_using_gcs_for_subplan(gcs_bucket):
    pipeline_def = define_inty_pipeline()

    environment_dict = {
        'storage': {
            'gcs': {
                'config': {
                    'gcs_bucket': gcs_bucket
                }
            }
        }
    }

    run_id = str(uuid.uuid4())

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict,
                                           run_config=RunConfig(run_id=run_id))

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun.create_empty_run(
        pipeline_def.name, run_id=run_id, environment_dict=environment_dict)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run,
                                 instance) as context:
        store = GCSIntermediateStore(
            gcs_bucket,
            run_id,
            client=context.scoped_resources_builder.build().gcs.client)
        assert store.has_intermediate(context, 'return_one.compute')
        assert store.get_intermediate(context, 'return_one.compute',
                                      Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run,
                                 instance) as context:
        assert store.has_intermediate(context, 'add_one.compute')
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_using_intermediate_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:

        run_config = {"intermediate_storage": {"filesystem": {}}}

        pipeline = reconstructable(define_inty_pipeline)

        environment_config = EnvironmentConfig.build(
            pipeline.get_definition(),
            run_config=run_config,
        )
        execution_plan = ExecutionPlan.build(
            pipeline,
            environment_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, pipeline_run.run_id)

        assert get_step_output(return_one_step_events, "return_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("return_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("return_one")).obj == 1)

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 environment_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("add_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("add_one")).obj == 2)
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(execution_plan, step_keys_to_execute=['nope'])

    assert exc_info.value.step_key == 'nope'

    assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
def test_using_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:
        pipeline = reconstructable(define_reconstructable_inty_pipeline)

        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(), )
        execution_plan = ExecutionPlan.build(
            pipeline,
            resolved_run_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(return_one_step_events, "return_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "return_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "add_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 2
Exemple #5
0
def test_using_s3_for_subplan(s3_bucket):
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']

    run_id = str(uuid.uuid4())

    store = S3IntermediateStore(s3_bucket, run_id)

    try:
        return_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=step_keys,
            )
        )

        assert get_step_output(return_one_step_events, 'return_one.compute')
        with scoped_pipeline_context(
            pipeline, environment_dict, RunConfig(run_id=run_id)
        ) as context:
            assert store.has_intermediate(context, 'return_one.compute')
            assert store.get_intermediate(context, 'return_one.compute', Int) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=['add_one.compute'],
            )
        )

        assert get_step_output(add_one_step_events, 'add_one.compute')
        with scoped_pipeline_context(
            pipeline, environment_dict, RunConfig(run_id=run_id)
        ) as context:
            assert store.has_intermediate(context, 'add_one.compute')
            assert store.get_intermediate(context, 'add_one.compute', Int) == 2
    finally:
        with scoped_pipeline_context(
            pipeline, environment_dict, RunConfig(run_id=run_id)
        ) as context:
            store.rm_intermediate(context, 'return_one.compute')
            store.rm_intermediate(context, 'add_one.compute')
Exemple #6
0
def _do_execute_plan(graphene_info, execution_params, dauphin_pipeline):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)

    run_id = execution_params.execution_metadata.run_id
    run_config = RunConfig(
        run_id=run_id, mode=execution_params.mode, tags=execution_params.execution_metadata.tags
    )

    execution_plan = create_execution_plan(
        pipeline=dauphin_pipeline.get_dagster_pipeline(),
        environment_dict=execution_params.environment_dict,
        run_config=run_config,
    )

    if execution_params.step_keys:
        for step_key in execution_params.step_keys:
            if not execution_plan.has_step(step_key):
                raise UserFacingGraphQLError(
                    graphene_info.schema.type_named('InvalidStepError')(invalid_step_key=step_key)
                )

    event_logs = []

    def _on_event_record(record):
        if record.is_dagster_event:
            event_logs.append(record)

    graphene_info.context.instance.add_event_listener(run_id, _on_event_record)

    execute_plan(
        execution_plan=execution_plan,
        environment_dict=execution_params.environment_dict,
        run_config=run_config,
        step_keys_to_execute=execution_params.step_keys,
        instance=graphene_info.context.instance,
    )

    def to_graphql_event(event_record):
        return from_dagster_event_record(
            graphene_info, event_record, dauphin_pipeline, execution_plan
        )

    return graphene_info.schema.type_named('ExecutePlanSuccess')(
        pipeline=dauphin_pipeline,
        has_failures=any(
            er
            for er in event_logs
            if er.is_dagster_event and er.dagster_event.event_type == DagsterEventType.STEP_FAILURE
        ),
        step_events=list(map(to_graphql_event, event_logs)),
        raw_event_records=list(map(serialize_dagster_namedtuple, event_logs)),
    )
Exemple #7
0
def test_configured_input_should_skip_step():
    called = {}

    @solid(output_defs=[OutputDefinition(is_required=False)])
    def one(_):
        yield Output(1)

    @solid
    def solid_should_not_skip(_, input_one, input_two):  # pylint: disable=unused-argument
        called["yup"] = True

    @pipeline
    def my_pipeline():
        solid_should_not_skip(one())

    run_config = {
        "solids": {
            "solid_should_not_skip": {
                "inputs": {
                    "input_two": {
                        "value": "2"
                    }
                }
            }
        }
    }
    execute_pipeline(my_pipeline, run_config=run_config)
    assert called.get("yup")

    # ensure should_skip_step behave the same as execute_pipeline
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="my_pipeline",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["one"],
            run_config=run_config,
        ),
        InMemoryPipeline(my_pipeline),
        instance,
        pipeline_run,
        run_config=run_config,
    )
    assert not should_skip_step(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["solid_should_not_skip"],
            run_config=run_config,
        ),
        instance,
        pipeline_run.run_id,
    )
def test_using_intermediates_file_system_for_subplan():
    pipeline = define_inty_pipeline()

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )

    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("return_one")).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    assert intermediate_storage.has_intermediate(None,
                                                 StepOutputHandle("add_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 2
def test_using_s3_for_subplan(s3_bucket):
    pipeline_def = define_inty_pipeline()

    environment_dict = {'storage': {'s3': {'config': {'s3_bucket': s3_bucket}}}}

    run_id = str(uuid.uuid4())

    execution_plan = create_execution_plan(
        pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id)
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=step_keys,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(
        pipeline_def, environment_dict, RunConfig(run_id=run_id), instance
    ) as context:
        store = S3IntermediateStore(
            s3_bucket, run_id, s3_session=context.scoped_resources_builder.build().s3.session
        )
        assert store.has_intermediate(context, 'return_one.compute')
        assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['add_one.compute'],
            instance=instance,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(
        pipeline_def, environment_dict, RunConfig(run_id=run_id), instance
    ) as context:
        assert store.has_intermediate(context, 'add_one.compute')
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
Exemple #10
0
def test_execution_plan_wrong_invalid_output_name():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM,
                             run_id=old_run_id),
    )

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[
                StepOutputHandle('add_one.compute', 'not_an_output')
            ],
        ),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict)

    with pytest.raises(DagsterStepOutputNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=run_config,
            step_keys_to_execute=['add_two.compute'],
        )

    assert str(exc_info.value) == (
        'You specified a step_output_handle in the ReexecutionConfig that does not exist: '
        'Step add_one.compute does not have output not_an_output.')

    assert exc_info.value.step_key == 'add_one.compute'
    assert exc_info.value.output_name == 'not_an_output'
Exemple #11
0
def test_resource_init_failure():
    @resource
    def failing_resource(_init_context):
        raise Exception("Uh oh")

    @solid(required_resource_keys={"failing_resource"})
    def failing_resource_solid(_context):
        pass

    the_pipeline = PipelineDefinition(
        name="test_resource_init_failure",
        solid_defs=[failing_resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={"failing_resource": failing_resource})
        ],
    )

    res = execute_pipeline(the_pipeline, raise_on_error=False)

    event_types = [event.event_type_value for event in res.event_list]
    assert DagsterEventType.PIPELINE_FAILURE.value in event_types

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(the_pipeline)
    pipeline_run = instance.create_run_for_pipeline(
        the_pipeline, execution_plan=execution_plan)

    with pytest.raises(
            DagsterResourceFunctionError,
            match=
            "Error executing resource_fn on ResourceDefinition failing_resource",
    ):

        execute_plan(
            execution_plan,
            InMemoryPipeline(the_pipeline),
            pipeline_run=pipeline_run,
            instance=instance,
        )

    # Test the pipeline init failure event fires even if we are raising errors
    events = []
    try:
        for event in execute_pipeline_iterator(the_pipeline):
            events.append(event)
    except DagsterResourceFunctionError:
        pass

    event_types = [event.event_type_value for event in events]
    assert DagsterEventType.PIPELINE_FAILURE.value in event_types
Exemple #12
0
def test_using_file_system_for_subplan_multiprocessing():

    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.local_temp()

    pipeline = reconstructable(define_inty_pipeline)

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition(), execution_plan=execution_plan
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['return_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert intermediates_manager.has_intermediate(None, StepOutputHandle('return_one.compute'))
    assert (
        intermediates_manager.get_intermediate(
            None, Int, StepOutputHandle('return_one.compute')
        ).obj
        == 1
    )

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert intermediates_manager.has_intermediate(None, StepOutputHandle('add_one.compute'))
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_one.compute')).obj
        == 2
    )
def test_using_file_system_for_subplan_multiprocessing():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']

    run_id = str(uuid.uuid4())

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=run_id,
                executor_config=MultiprocessExecutorConfig(
                    handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline)
                ),
            ),
            step_keys_to_execute=step_keys,
        )
    )

    store = FileSystemIntermediateStore(run_id)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert store.has_intermediate(None, 'return_one.compute')
    assert store.get_intermediate(None, 'return_one.compute', Int) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=run_id,
                executor_config=MultiprocessExecutorConfig(
                    handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline)
                ),
            ),
            step_keys_to_execute=['add_one.compute'],
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert store.has_intermediate(None, 'add_one.compute')
    assert store.get_intermediate(None, 'add_one.compute', Int) == 2
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            DagsterInstance.ephemeral(),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
        )
Exemple #15
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    old_run_id = make_new_run_id()
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )

    assert result.success

    ## re-execute add_two

    new_run_id = make_new_run_id()

    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name,
        run_id=new_run_id,
        environment_dict=environment_dict,
        mode='default',
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    execution_plan = create_execution_plan(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=new_run_id, previous_run_id=result.run_id),
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan.build_subset_plan(['add_two.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
Exemple #16
0
def test_fan_out_should_skip_step():
    @solid(output_defs=[
        OutputDefinition(Int, "out_1", is_required=False),
        OutputDefinition(Int, "out_2", is_required=False),
        OutputDefinition(Int, "out_3", is_required=False),
    ])
    def foo(_):
        yield Output(1, "out_1")

    @solid
    def bar(_, input_arg):
        return input_arg

    @pipeline
    def optional_outputs():
        foo_res = foo()
        # pylint: disable=no-member
        bar.alias("bar_1")(input_arg=foo_res.out_1)
        bar.alias("bar_2")(input_arg=foo_res.out_2)
        bar.alias("bar_3")(input_arg=foo_res.out_3)

    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="optional_outputs",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]),
        InMemoryPipeline(optional_outputs),
        instance,
        pipeline_run,
    )

    assert not should_skip_step(
        create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1"
                                                                      ]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_2"]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_3"]),
        instance,
        pipeline_run.run_id,
    )
def test_using_file_system_for_subplan_multiprocessing():

    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.local_temp()

    execution_plan = create_execution_plan(
        ExecutionTargetHandle.for_pipeline_fn(
            define_inty_pipeline).build_pipeline_definition(),
        environment_dict=environment_dict,
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']

    run_id = str(uuid.uuid4())
    instance.create_empty_run(run_id, execution_plan.pipeline_def.name)

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            instance,
            environment_dict=dict(environment_dict,
                                  execution={'multiprocess': {}}),
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=step_keys,
        ))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        run_id)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert store.has_intermediate(None, 'return_one.compute')
    assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            instance,
            environment_dict=dict(environment_dict,
                                  execution={'multiprocess': {}}),
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['add_one.compute'],
        ))

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert store.has_intermediate(None, 'add_one.compute')
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
Exemple #18
0
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        'storage': {
            'filesystem': {}
        },
        'intermediate_storage': {
            'in_memory': {}
        }
    }

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key('return_one.compute')

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['return_one.compute']),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)
    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert not intermediates_manager.has_intermediate(
        None, StepOutputHandle('return_one.compute'))
Exemple #19
0
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    run_config = {'storage': {'filesystem': {}}}

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline, run_config=run_config)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        )
Exemple #20
0
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key('return_one.compute')
    assert execution_plan.get_step_by_key('add_one.compute')

    pipeline_run = PipelineRun.create_empty_run(pipeline_def.name, make_new_run_id())
    events = execute_plan(
        execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral()
    )
    # start, out, success, start, input, out, success
    assert [e.event_type_value for e in events] == [
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
    ]

    output_events = [e for e in events if e.event_type_value == 'STEP_OUTPUT']

    assert output_events[0].step_key == 'return_one.compute'
    assert output_events[0].is_successful_output

    assert output_events[1].step_key == 'add_one.compute'
    assert output_events[1].is_successful_output
Exemple #21
0
def query_on_dask_worker(
    dependencies,
    recon_pipeline,
    pipeline_run,
    run_config,
    step_keys,
    mode,
    instance_ref,
):  # pylint: disable=unused-argument
    """Note that we need to pass "dependencies" to ensure Dask sequences futures during task
    scheduling, even though we do not use this argument within the function.
    """

    with DagsterInstance.from_ref(instance_ref) as instance:
        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=run_config,
            step_keys_to_execute=step_keys,
            mode=mode,
        )

        return execute_plan(execution_plan,
                            instance,
                            pipeline_run,
                            run_config=run_config)
Exemple #22
0
def test_reentrant_execute_plan():
    called = {}

    @solid
    def has_tag(context):
        assert context.has_tag("foo")
        assert context.get_tag("foo") == "bar"
        called["yup"] = True

    pipeline_def = PipelineDefinition(name="has_tag_pipeline",
                                      solid_defs=[has_tag])
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        tags={"foo": "bar"},
        execution_plan=execution_plan)
    step_events = execute_plan(execution_plan,
                               pipeline_run=pipeline_run,
                               instance=instance)

    assert called["yup"]
    assert len(step_events) == 4

    assert step_events[1].logging_tags["foo"] == "bar"
Exemple #23
0
def test_execution_plan_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }
    execution_plan = create_execution_plan(
        composite_pipeline_with_config_mapping,
        environment_dict=environment_dict)
    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline_with_config_mapping,
        execution_plan=execution_plan)

    events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    assert [e.event_type_value for e in events] == [
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
    ]
Exemple #24
0
def test_execution_plan_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }
    execution_plan = create_execution_plan(
        composite_pipeline_with_config_mapping,
        environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(
        composite_pipeline_with_config_mapping.name, str(uuid.uuid4()))

    events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=DagsterInstance.ephemeral(),
    )

    assert [e.event_type_value for e in events] == [
        'ENGINE_EVENT',
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'ENGINE_EVENT',
    ]
Exemple #25
0
def test_execution_plan_two_outputs():
    @solid(output_defs=[
        OutputDefinition(Int, "num_one"),
        OutputDefinition(Int, "num_two")
    ])
    def return_one_two(_context):
        yield Output(1, "num_one")
        yield Output(2, "num_two")

    pipeline_def = PipelineDefinition(name="return_one_two_pipeline",
                                      solid_defs=[return_one_two])

    execution_plan = create_execution_plan(pipeline_def)

    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)
    step_events = execute_plan(execution_plan,
                               pipeline_run=pipeline_run,
                               instance=instance)

    assert step_events[1].step_key == "return_one_two.compute"
    assert step_events[1].step_output_data.output_name == "num_one"
    assert step_events[3].step_key == "return_one_two.compute"
    assert step_events[3].step_output_data.output_name == "num_two"
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         environment_config),
        InMemoryPipeline(pipeline),
        instance,
        run_config=run_config,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterStepOutputNotFoundError" in failures[
        0].event_specific_data.error.message
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key("return_one.compute")
    assert execution_plan.get_step_by_key("add_one.compute")

    events = execute_plan(execution_plan,
                          pipeline_run=pipeline_run,
                          instance=instance)
    step_starts = find_events(events, event_type="STEP_START")
    assert len(step_starts) == 2
    step_successes = find_events(events, event_type="STEP_SUCCESS")
    assert len(step_successes) == 2

    output_events = find_events(events, event_type="STEP_OUTPUT")

    assert output_events[0].step_key == "return_one.compute"
    assert output_events[0].is_successful_output

    assert output_events[1].step_key == "add_one.compute"
    assert output_events[1].is_successful_output
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)

    run_id = str(uuid.uuid4())

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['nope'],
        )
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        "storage": {
            "filesystem": {}
        },
        "intermediate_storage": {
            "in_memory": {}
        }
    }

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"]),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert not intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
Exemple #30
0
def test_execution_plan_for_composite_solid_with_config_mapping():
    run_config = {
        "solids": {
            "composite_with_nested_config_solid_and_config_mapping": {
                "config": {
                    "foo": "baz",
                    "bar": 3
                }
            }
        }
    }
    execution_plan = create_execution_plan(
        composite_pipeline_with_config_mapping, run_config=run_config)
    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline_with_config_mapping,
        execution_plan=execution_plan)

    events = execute_plan(
        execution_plan,
        run_config=run_config,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    assert [e.event_type_value for e in events] == [
        "STEP_START",
        "STEP_OUTPUT",
        "STEP_SUCCESS",
        "STEP_START",
        "STEP_INPUT",
        "STEP_OUTPUT",
        "STEP_SUCCESS",
    ]