Exemple #1
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = InMemoryPipeline(PipelineDefinition([]))
    pipeline_def = pipeline.get_definition()
    instance = check.opt_inst_param(instance,
                                    "instance",
                                    DagsterInstance,
                                    default=DagsterInstance.ephemeral())

    execution_plan = create_execution_plan(pipeline)

    pipeline_run = instance.create_run(
        pipeline_name="<empty>",
        run_id=run_id,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
        execution_plan_snapshot=snapshot_from_execution_plan(
            execution_plan, pipeline_def.get_pipeline_snapshot_id()),
        parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
    )
    with scoped_pipeline_context(execution_plan, {}, pipeline_run,
                                 instance) as context:
        yield context
Exemple #2
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}}
    result = execute_pipeline(pipeline_def,
                              run_config=run_config,
                              instance=instance)

    assert result.success

    ## re-execute add_two

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def),
                                         environment_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan.build_subset_plan(["add_two"], pipeline_def,
                                             environment_config),
            InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         environment_config),
        InMemoryPipeline(pipeline),
        instance,
        run_config=run_config,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterStepOutputNotFoundError" in failures[
        0].event_specific_data.error.message
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()

    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        )
def test_execute_canceled_state():
    def event_callback(_record):
        pass

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        execute_run(InMemoryPipeline(pipeline_def),
                    pipeline_run,
                    instance=instance)

        logs = instance.all_logs(pipeline_run.run_id)

        assert len(logs) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in logs[0].message)

        iter_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        iter_events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 iter_run,
                                 instance=instance))

        assert len(iter_events) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in iter_events[0].message)
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline(using_file_system=True)

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(pipeline, )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         resolved_run_config),
        InMemoryPipeline(pipeline),
        instance,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterExecutionLoadInputError" in failures[
        0].event_specific_data.error.message
Exemple #7
0
def test_subset_for_execution():
    pipeline = InMemoryPipeline(foo_pipeline)
    sub_pipeline = pipeline.subset_for_execution(["*add_nums"])
    assert sub_pipeline.solid_selection == ["*add_nums"]
    assert sub_pipeline.solids_to_execute == {"add_nums", "return_one", "return_two"}

    result = execute_pipeline(sub_pipeline)
    assert result.success
Exemple #8
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={"a": resource_a, "b": resource_b},
                    logger_defs={"callback": construct_event_logger(event_callback)},
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default",
        )

        iterator = execute_run_iterator(
            InMemoryPipeline(pipeline_def), pipeline_run, instance=instance
        )

        event_type = None
        while event_type != "STEP_START":
            event = next(iterator)
            event_type = event.event_type_value

        iterator.close()
        events = [record.dagster_event for record in records if record.is_dagster_event]
        messages = [record.user_message for record in records if not record.is_dagster_event]
        pipeline_failure_events = [event for event in events if event.is_pipeline_failure]
        assert len(pipeline_failure_events) == 1
        assert "GeneratorExit" in pipeline_failure_events[0].pipeline_failure_data.error.message
        assert len([message for message in messages if message == "CLEANING A"]) > 0
        assert len([message for message in messages if message == "CLEANING B"]) > 0

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
            check.CheckError,
            match=r"Pipeline run basic_resource_pipeline \({}\) in state"
            r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format(
                pipeline_run.run_id
            ),
        ):
            execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
def test_using_intermediates_file_system_for_subplan():
    pipeline = define_inty_pipeline()

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    environment_config = EnvironmentConfig.build(
        pipeline,
        run_config=run_config,
    )

    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("return_one")).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    assert intermediate_storage.has_intermediate(None,
                                                 StepOutputHandle("add_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 2
Exemple #10
0
def create_test_pipeline_execution_context(logger_defs=None):
    from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage

    loggers = check.opt_dict_param(
        logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name="test_legacy_context", solid_defs=[], mode_defs=[mode_def]
    )
    run_config = {"loggers": {key: {} for key in loggers}}
    pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config)
    creation_data = create_context_creation_data(
        InMemoryPipeline(pipeline_def), execution_plan, run_config, pipeline_run, instance
    )
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor = create_executor(creation_data)

    return PlanExecutionContext(
        plan_data=create_plan_data(creation_data, True, executor.retries),
        execution_data=create_execution_data(
            context_creation_data=creation_data,
            scoped_resources_builder=scoped_resources_builder,
            intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id),
        ),
        log_manager=log_manager,
        output_capture=None,
    )
def test_reentrant_execute_plan():
    called = {}

    @solid
    def has_tag(context):
        assert context.has_tag("foo")
        assert context.get_tag("foo") == "bar"
        called["yup"] = True

    pipeline_def = PipelineDefinition(name="has_tag_pipeline", solid_defs=[has_tag])
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, tags={"foo": "bar"}, execution_plan=execution_plan
    )
    step_events = execute_plan(
        execution_plan, InMemoryPipeline(pipeline_def), pipeline_run=pipeline_run, instance=instance
    )

    assert called["yup"]

    assert (
        find_events(step_events, event_type="STEP_OUTPUT")[0].logging_tags["pipeline_tags"]
        == "{'foo': 'bar'}"
    )
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan
    )

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key("return_one")
    assert execution_plan.get_step_by_key("add_one")

    events = execute_plan(
        execution_plan, InMemoryPipeline(pipeline_def), pipeline_run=pipeline_run, instance=instance
    )
    step_starts = find_events(events, event_type="STEP_START")
    assert len(step_starts) == 2
    step_successes = find_events(events, event_type="STEP_SUCCESS")
    assert len(step_successes) == 2

    output_events = find_events(events, event_type="STEP_OUTPUT")

    assert output_events[0].step_key == "return_one"
    assert output_events[0].is_successful_output

    assert output_events[1].step_key == "add_one"
    assert output_events[1].is_successful_output
def test_execution_plan_wrong_run_id():
    pipeline_def = define_addy_pipeline(using_file_system=True)

    unrun_id = "not_a_run"
    run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}}

    instance = DagsterInstance.ephemeral()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=unrun_id,
        root_run_id=unrun_id,
    )

    with pytest.raises(DagsterRunNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )

    assert str(exc_info.value) == "Run id {} set as parent run id was not found in instance".format(
        unrun_id
    )

    assert exc_info.value.invalid_run_id == unrun_id
Exemple #14
0
def core_execute_in_process(
    node: NodeDefinition,
    run_config: Dict[str, Any],
    ephemeral_pipeline: PipelineDefinition,
    instance: Optional[DagsterInstance],
    output_capturing_enabled: bool,
    raise_on_error: bool,
    run_tags: Optional[Dict[str, Any]] = None,
    run_id: Optional[str] = None,
) -> ExecuteInProcessResult:
    pipeline_def = ephemeral_pipeline
    mode_def = pipeline_def.get_mode_definition()
    pipeline = InMemoryPipeline(pipeline_def)

    execution_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
        mode=mode_def.name,
        instance_ref=instance.get_ref() if instance and instance.is_persistent else None,
    )

    output_capture: Dict[StepOutputHandle, Any] = {}

    with ephemeral_instance_if_missing(instance) as execute_instance:
        pipeline_run = execute_instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config=run_config,
            mode=mode_def.name,
            tags={**pipeline_def.tags, **(run_tags or {})},
            run_id=run_id,
        )
        run_id = pipeline_run.run_id

        execute_run_iterable = ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            iterator=pipeline_execution_iterator,
            execution_context_manager=PlanOrchestrationContextManager(
                context_event_generator=orchestration_context_event_generator,
                pipeline=pipeline,
                execution_plan=execution_plan,
                pipeline_run=pipeline_run,
                instance=execute_instance,
                run_config=run_config,
                executor_defs=None,
                output_capture=output_capture if output_capturing_enabled else None,
                raise_on_error=raise_on_error,
            ),
        )

        event_list = []

        for event in execute_run_iterable:
            event_list.append(event)

            if event.is_pipeline_event:
                execute_instance.handle_run_event(run_id, event)

    return ExecuteInProcessResult(
        node, event_list, execute_instance.get_run_by_id(run_id), output_capture
    )
Exemple #15
0
def test_execute_run_bad_state():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={"a": resource_a, "b": resource_b},
                    logger_defs={"callback": construct_event_logger(event_callback)},
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
            check.CheckError,
            match=r"Pipeline run basic_resource_pipeline \({}\) in state"
            r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format(
                pipeline_run.run_id
            ),
        ):
            execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
def test_execution_plan_two_outputs():
    @solid(output_defs=[
        OutputDefinition(Int, "num_one"),
        OutputDefinition(Int, "num_two")
    ])
    def return_one_two(_context):
        yield Output(1, "num_one")
        yield Output(2, "num_two")

    pipeline_def = PipelineDefinition(name="return_one_two_pipeline",
                                      solid_defs=[return_one_two])

    execution_plan = create_execution_plan(pipeline_def)

    instance = DagsterInstance.ephemeral()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)
    events = execute_plan(execution_plan,
                          InMemoryPipeline(pipeline_def),
                          pipeline_run=pipeline_run,
                          instance=instance)

    output_events = find_events(events, event_type="STEP_OUTPUT")
    assert output_events[0].step_key == "return_one_two"
    assert output_events[0].step_output_data.output_name == "num_one"
    assert output_events[1].step_key == "return_one_two"
    assert output_events[1].step_output_data.output_name == "num_two"
Exemple #17
0
def _synthesize_events(solids_fn, run_id=None, check_success=True):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[_mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            a_pipe,
            run_id=run_id,
            run_config={"loggers": {
                "callback": {},
                "console": {}
            }})

        result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance)

        if check_success:
            assert result.success

        return events, result
Exemple #18
0
def test_compile():
    environment_config = EnvironmentConfig.build(
        composition,
        {"solids": {
            "add_four": {
                "inputs": {
                    "num": {
                        "value": 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryPipeline(composition),
                               environment_config)

    res = coalesce_execution_steps(plan)

    assert set(res.keys()) == {
        "add_four.add_two.add_one",
        "add_four.add_two.add_one_2",
        "add_four.add_two_2.add_one",
        "add_four.add_two_2.add_one_2",
        "div_four.div_two",
        "div_four.div_two_2",
        "int_to_float",
    }
Exemple #19
0
def synthesize_events(solids_fn, run_id=None):
    events = []

    def _append_event(event):
        events.append(event)

    @pipeline(mode_defs=[mode_def(_append_event)])
    def a_pipe():
        solids_fn()

    instance = DagsterInstance.local_temp()

    pipeline_run = instance.create_run_for_pipeline(
        a_pipe,
        run_id=run_id,
        run_config={"loggers": {
            "callback": {},
            "console": {}
        }})

    result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance)

    assert result.success

    return events, result
Exemple #20
0
def _check_pipeline(pipeline):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryPipeline(pipeline)

    check.inst_param(pipeline, "pipeline", IPipeline)
    return pipeline
def test_single_step_resource_event_logs():
    # Test to attribute logs for single-step plans which are often the representation of
    # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten
    # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239
    USER_SOLID_MESSAGE = "I AM A SOLID"
    USER_RESOURCE_MESSAGE = "I AM A RESOURCE"
    events = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        events.append(record)

    @solid(required_resource_keys={"a"})
    def resource_solid(context):
        context.log.info(USER_SOLID_MESSAGE)

    @resource
    def resource_a(context):
        context.log.info(USER_RESOURCE_MESSAGE)
        return "A"

    the_pipeline = PipelineDefinition(
        name="resource_logging_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={"a": resource_a},
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )

    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            the_pipeline,
            run_config={"loggers": {
                "callback": {}
            }},
            step_keys_to_execute=["resource_solid"],
        )

        result = execute_run(InMemoryPipeline(the_pipeline), pipeline_run,
                             instance)

        assert result.success
        log_messages = [
            event for event in events if isinstance(event, EventRecord)
            and event.level == coerce_valid_log_level("INFO")
        ]
        assert len(log_messages) == 2

        resource_log_message = next(
            iter([
                message for message in log_messages
                if message.user_message == USER_RESOURCE_MESSAGE
            ]))
        assert resource_log_message.step_key == "resource_solid"
Exemple #22
0
def test_clean_event_generator_exit():
    """Testing for generator cleanup
    (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/)
    """
    from dagster.core.execution.context.init import InitResourceContext
    from dagster.core.definitions.resource import ScopedResourcesBuilder

    pipeline_def = gen_basic_resource_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)
    log_manager = DagsterLogManager(run_id=pipeline_run.run_id,
                                    logging_tags={},
                                    loggers=[])
    resolved_run_config = ResolvedRunConfig.build(pipeline_def)
    execution_plan = create_execution_plan(pipeline_def)

    resource_name, resource_def = next(
        iter(pipeline_def.get_default_mode().resource_defs.items()))
    resource_context = InitResourceContext(
        resource_def=resource_def,
        resources=ScopedResourcesBuilder().build(None),
        resource_config=None,
        pipeline_run=pipeline_run,
        instance=instance,
    )
    generator = single_resource_event_generator(resource_context,
                                                resource_name, resource_def)
    next(generator)
    generator.close()

    resource_defs = pipeline_def.get_mode_definition(resolved_run_config.mode)

    generator = resource_initialization_event_generator(
        resource_defs=resource_defs,
        resource_configs=resolved_run_config.resources,
        log_manager=log_manager,
        execution_plan=execution_plan,
        pipeline_run=pipeline_run,
        resource_keys_to_init={"a"},
        instance=instance,
        emit_persistent_events=True,
        pipeline_def_for_backwards_compat=pipeline_def,
    )
    next(generator)
    generator.close()

    generator = PlanExecutionContextManager(  # pylint: disable=protected-access
        pipeline=InMemoryPipeline(pipeline_def),
        execution_plan=execution_plan,
        run_config={},
        pipeline_run=pipeline_run,
        instance=instance,
        retry_mode=RetryMode.DISABLED,
        scoped_resources_builder_cm=resource_initialization_manager,
    ).get_generator()
    next(generator)
    generator.close()
Exemple #23
0
def execute_pipeline_with_steps(pipeline_def, step_keys_to_execute=None):
    plan = create_execution_plan(pipeline_def, step_keys_to_execute=step_keys_to_execute)
    with DagsterInstance.ephemeral() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            step_keys_to_execute=step_keys_to_execute,
        )
        return execute_plan(plan, InMemoryPipeline(pipeline_def), instance, pipeline_run)
def test_using_file_system_for_subplan():
    pipeline = define_inty_pipeline(using_file_system=True)

    instance = DagsterInstance.ephemeral()

    resolved_run_config = ResolvedRunConfig.build(pipeline)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline),
                                         resolved_run_config)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    with open(
            os.path.join(instance.storage_directory(), pipeline_run.run_id,
                         "return_one", "result"),
            "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    with open(
            os.path.join(instance.storage_directory(), pipeline_run.run_id,
                         "add_one", "result"),
            "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 2
Exemple #25
0
def test_execute_plan_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    with instance_for_test() as instance:
        pipeline = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        run_config = {"loggers": {"callback": {}}}

        execution_plan = create_execution_plan(pipeline, run_config=run_config)
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline,
            run_config={"loggers": {
                "callback": {}
            }},
            execution_plan=execution_plan,
        )

        iterator = execute_plan_iterator(
            execution_plan,
            InMemoryPipeline(pipeline),
            pipeline_run,
            instance,
            run_config=run_config,
        )

        event_type = None
        while event_type != "STEP_START":
            event = next(iterator)
            event_type = event.event_type_value

        iterator.close()
        messages = [
            record.user_message for record in records
            if not record.is_dagster_event
        ]
        assert len(
            [message for message in messages if message == "CLEANING A"]) > 0
        assert len(
            [message for message in messages if message == "CLEANING B"]) > 0
Exemple #26
0
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system):
    adls2_prefix = "custom_prefix"

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {
        "resources": {
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": get_azure_credential()
                }
            }
        },
        "intermediate_storage": {
            "adls2": {
                "config": {
                    "adls2_file_system": file_system,
                    "adls2_prefix": adls2_prefix
                }
            }
        },
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe,
        run_config=run_config,
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(
            execution_plan,
            InMemoryPipeline(pipe),
            run_config,
            pipeline_run,
            instance,
    ) as context:
        resource = context.scoped_resources_builder.build(
            required_resource_keys={"adls2"}).adls2
        intermediate_storage = ADLS2IntermediateStorage(
            run_id=result.run_id,
            file_system=file_system,
            prefix=adls2_prefix,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        assert intermediate_storage.root == "/".join(
            ["custom_prefix", "storage", result.run_id])
        assert (intermediate_storage.get_intermediate(
            context, Int, StepOutputHandle("return_one")).obj == 1)
        assert (intermediate_storage.get_intermediate(
            context, Int, StepOutputHandle("add_one")).obj == 2)
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()
    instance = DagsterInstance.ephemeral()

    environment_config = EnvironmentConfig.build(pipeline, )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        environment_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute"]

    assert str(exc_info.value
               ) == "Can not build subset plan from unknown step: nope.compute"

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(
                ["nope.compute", "nuh_uh.compute"], pipeline,
                environment_config),
            InMemoryPipeline(pipeline),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"]

    assert (
        str(exc_info.value) ==
        "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute"
    )
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline_def = PipelineDefinition(
        name="basic_resource_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": resource_a,
                    "b": resource_b
                },
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        run_config={"loggers": {
            "callback": {}
        }},
        mode="default",
    )

    iterator = execute_run_iterator(InMemoryPipeline(pipeline_def),
                                    pipeline_run,
                                    instance=instance)

    event_type = None
    while event_type != "STEP_START":
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [
        record.dagster_event for record in records if record.is_dagster_event
    ]
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message
                for message in messages if message == "CLEANING A"]) > 0
    assert len([message
                for message in messages if message == "CLEANING B"]) > 0
Exemple #29
0
def test_configured_input_should_skip_step():
    called = {}

    @solid(output_defs=[OutputDefinition(is_required=False)])
    def one(_):
        yield Output(1)

    @solid
    def solid_should_not_skip(_, input_one, input_two):  # pylint: disable=unused-argument
        called["yup"] = True

    @pipeline
    def my_pipeline():
        solid_should_not_skip(one())

    run_config = {
        "solids": {
            "solid_should_not_skip": {
                "inputs": {
                    "input_two": {
                        "value": "2"
                    }
                }
            }
        }
    }
    execute_pipeline(my_pipeline, run_config=run_config)
    assert called.get("yup")

    # ensure should_skip_step behave the same as execute_pipeline
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="my_pipeline",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["one"],
            run_config=run_config,
        ),
        InMemoryPipeline(my_pipeline),
        instance,
        pipeline_run,
        run_config=run_config,
    )
    assert not should_skip_step(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["solid_should_not_skip"],
            run_config=run_config,
        ),
        instance,
        pipeline_run.run_id,
    )
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        "storage": {
            "filesystem": {}
        },
        "intermediate_storage": {
            "in_memory": {}
        }
    }

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert not intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))