Example #1
0
def _sum_only_pipeline():
    return PipelineDefinition(solid_defs=[sum_table, sum_sq_table],
                              dependencies={})
Example #2
0
def test_list_dependencies():
    with pytest.raises(DagsterInvalidDefinitionError,
                       match='The expected type for "dependencies" is Dict'):
        PipelineDefinition(solid_defs=solid_a_b_list(),
                           name="test",
                           dependencies=[])
def test_mode_takes_a_name():
    pipeline_def = PipelineDefinition(
        name='takes a mode',
        solids=[],
        mode_definitions=[ModeDefinition(name='a_mode')])
    assert pipeline_def
Example #4
0
# pylint: disable=unused-argument

from dagster import DependencyDefinition, InputDefinition, PipelineDefinition, pipeline, solid


@solid
def return_one(context):
    return 1


@solid(input_defs=[InputDefinition("number", int)])
def add_one(context, number):
    return number + 1


@pipeline
def one_plus_one_pipeline():
    add_one(return_one())


one_plus_one_pipeline_def = PipelineDefinition(
    name="one_plus_one_pipeline",
    solid_defs=[return_one, add_one],
    dependencies={"add_one": {
        "number": DependencyDefinition("return_one")
    }},
)
def single_int_output_pipeline():
    @lambda_solid(output_def=OutputDefinition(Int))
    def return_one():
        return 1

    return PipelineDefinition(name="single_int_output_pipeline", solid_defs=[return_one])
def create_diamond_pipeline():
    return PipelineDefinition(name='diamond_pipeline',
                              solid_defs=create_diamond_solids(),
                              dependencies=diamond_deps())
def test_empty_pipeline_execution():
    result = execute_pipeline(PipelineDefinition(solid_defs=[]))

    assert result.success
Example #8
0
def define_no_repo_registration_error_pipeline():
    return PipelineDefinition(name='repo_registration_error', solids=[no_repo_reg_solid()])
Example #9
0
def define_hello_world_pipeline():
    return PipelineDefinition(name='hello_world_pipeline', solids=[define_hello_world_solid()])
Example #10
0
def define_pandas_source_test_pipeline():
    return PipelineDefinition(
        name='input_transform_test_pipeline',
        solids=[define_pandas_source_test_solid()],
    )
Example #11
0
def define_error_pipeline():
    return PipelineDefinition(
        name='error_pipeline',
        solids=[dm.define_dagstermill_solid('error_solid', nb_test_path('error_notebook'))],
    )
Example #12
0
def test_builtins_available():
    pipeline = PipelineDefinition(name='test_builting_available', solids=[])
    for builtin_type in ALL_RUNTIME_BUILTINS:
        assert pipeline.has_runtime_type(builtin_type.name)
        assert pipeline.runtime_type_named(builtin_type.name).is_builtin
Example #13
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventLogEntry)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={"loggers": {
                "callback": {}
            }},
            mode="default",
        )

        iterator = execute_run_iterator(InMemoryPipeline(pipeline_def),
                                        pipeline_run,
                                        instance=instance)

        event_type = None
        while event_type != "STEP_START":
            event = next(iterator)
            event_type = event.event_type_value

        iterator.close()
        events = [
            record.dagster_event for record in records
            if record.is_dagster_event
        ]
        messages = [
            record.user_message for record in records
            if not record.is_dagster_event
        ]
        pipeline_failure_events = [
            event for event in events if event.is_pipeline_failure
        ]
        assert len(pipeline_failure_events) == 1
        assert "GeneratorExit" in pipeline_failure_events[
            0].pipeline_failure_data.error.message
        assert len(
            [message for message in messages if message == "CLEANING A"]) > 0
        assert len(
            [message for message in messages if message == "CLEANING B"]) > 0

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
                Exception,
                match=r"basic_resource_pipeline \({}\) started a new "
                r"run while the run was already in state DagsterRunStatus.SUCCESS."
                .format(pipeline_run.run_id),
        ):
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance)

        with instance_for_test(
                overrides={
                    "run_launcher": {
                        "module":
                        "dagster_tests.daemon_tests.test_monitoring_daemon",
                        "class": "TestRunLauncher",
                    },
                    "run_monitoring": {
                        "enabled": True
                    },
                }) as run_monitoring_instance:
            event = next(
                execute_run_iterator(InMemoryPipeline(pipeline_def),
                                     pipeline_run,
                                     instance=run_monitoring_instance))
            assert (
                "Ignoring a duplicate run that was started from somewhere other than the run monitor daemon"
                in event.message)

            with pytest.raises(
                    check.CheckError,
                    match=
                    r"in state DagsterRunStatus.SUCCESS, expected STARTED or STARTING "
                    r"because it's resuming from a run worker failure",
            ):
                execute_run_iterator(
                    InMemoryPipeline(pipeline_def),
                    pipeline_run,
                    instance=run_monitoring_instance,
                    resume_from_failure=True,
                )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance))

        assert len(events) == 1
        assert (
            events[0].message ==
            "Not starting execution since the run was canceled before execution could start"
        )
Example #14
0
def silencing_pipeline(solids, dependencies=None):
    return PipelineDefinition(solids=solids,
                              dependencies=dependencies,
                              context_definitions=silencing_default_context())
Example #15
0
def test_default_mode_definition():
    pipeline_def = PipelineDefinition(name="takesamode", solid_defs=[])
    assert pipeline_def
Example #16
0
def define_hello_world_with_output_pipeline():
    return PipelineDefinition(
        name='hello_world_with_output_pipeline', solids=[define_hello_world_with_output()]
    )
Example #17
0
def test_mode_takes_a_name():
    pipeline_def = PipelineDefinition(
        name="takesamode",
        solid_defs=[],
        mode_defs=[ModeDefinition(name="a_mode")])
    assert pipeline_def
Example #18
0
def define_hello_world_explicit_yield_pipeline():
    return PipelineDefinition(
        name='hello_world_explicit_yield_pipeline', solids=[define_hello_world_explicit_yield()]
    )
def test_pipeline_execution_graph_diamond():
    pipe = PipelineDefinition(solid_defs=create_diamond_solids(),
                              dependencies=diamond_deps())
    return _do_test(pipe)
Example #20
0
def define_hello_logging_pipeline():
    return PipelineDefinition(name='hello_logging_pipeline', solids=[define_hello_logging_solid()])
def test_reexecution_fs_storage_with_subset():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def,
                                       environment_dict,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        step_keys_to_execute=['return_one.compute'],
        parent_run_id=pipeline_result.run_id,
        root_run_id=pipeline_result.run_id,
    )
    reexecution_result_no_subset = execute_run(pipeline_def,
                                               reexecution_pipeline_run,
                                               instance)
    assert reexecution_result_no_subset.success
    assert len(reexecution_result_no_subset.solid_result_list) == 2
    assert reexecution_result_no_subset.result_for_solid('add_one').skipped
    assert reexecution_result_no_subset.result_for_solid(
        'return_one').output_value() == 1

    pipeline_result_subset = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        instance=instance,
        solid_subset=['return_one'],
    )
    assert pipeline_result_subset.success
    assert len(pipeline_result_subset.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert pipeline_result_subset.result_for_solid(
        'return_one').output_value() == 1

    reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=pipeline_result_subset.run_id,
        root_run_id=pipeline_result_subset.run_id,
        solid_subset=['return_one'],
        step_keys_to_execute=['return_one.compute'],
    )

    reexecution_result = execute_run(pipeline_def, reexecution_pipeline_run,
                                     instance)

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_subset.result_for_solid('add_one')
    assert reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape(
                'Execution plan does not contain step: add_one.compute'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=pipeline_result_subset.run_id,
            root_run_id=pipeline_result_subset.run_id,
            solid_subset=['return_one'],
            step_keys_to_execute=['add_one.compute'],
        )

    re_reexecution_pipeline_run = instance.create_run_for_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        parent_run_id=reexecution_result.run_id,
        root_run_id=reexecution_result.run_id,
        solid_subset=['return_one'],
        step_keys_to_execute=['return_one.compute'],
    )

    re_reexecution_result = execute_run(pipeline_def,
                                        re_reexecution_pipeline_run, instance)

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid(
        'return_one').output_value() == 1

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape('Execution plan does not contain step: add_one'),
    ):
        instance.create_run_for_pipeline(
            pipeline_def,
            environment_dict=environment_dict,
            parent_run_id=reexecution_result.run_id,
            root_run_id=reexecution_result.run_id,
            solid_subset=['return_one'],
            step_keys_to_execute=['add_one.compute'],
        )
Example #22
0
def test_multiple_outputs_only_emit_one():
    def _t_fn(*_args):
        yield Result(output_name='output_one', value='foo')

    solid = SolidDefinition(
        name='multiple_outputs',
        inputs=[],
        outputs=[
            OutputDefinition(name='output_one'),
            OutputDefinition(name='output_two')
        ],
        transform_fn=_t_fn,
    )

    called = {}

    def _transform_fn_one(*_args, **_kwargs):
        called['one'] = True

    downstream_one = SolidDefinition(
        name='downstream_one',
        inputs=[InputDefinition('some_input')],
        outputs=[],
        transform_fn=_transform_fn_one,
    )

    def _transform_fn_two(*_args, **_kwargs):
        raise Exception('do not call me')

    downstream_two = SolidDefinition(
        name='downstream_two',
        inputs=[InputDefinition('some_input')],
        outputs=[],
        transform_fn=_transform_fn_two,
    )

    pipeline = PipelineDefinition(
        solids=[solid, downstream_one, downstream_two],
        dependencies={
            'downstream_one': {
                'some_input': DependencyDefinition(solid.name,
                                                   output='output_one')
            },
            'downstream_two': {
                'some_input': DependencyDefinition(solid.name,
                                                   output='output_two')
            },
        },
    )

    result = execute_pipeline(pipeline)
    assert result.success

    assert called['one']
    solid_result = result.result_for_solid('multiple_outputs')
    assert set(solid_result.transformed_values.keys()) == set(['output_one'])

    with pytest.raises(
            DagsterInvariantViolationError,
            match='not_defined not defined in solid multiple_outputs'):
        solid_result.transformed_value('not_defined')

    with pytest.raises(DagsterInvariantViolationError,
                       match='Did not find result output_two'):
        solid_result.transformed_value('output_two')

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            'Try to get result for solid not_present in <<unnamed>>. No such solid.',
    ):
        result.result_for_solid('not_present')

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            'Did not find result for solid downstream_two in pipeline execution result',
    ):
        result.result_for_solid('downstream_two')
Example #23
0
def dummy_init_logger_context(logger_def, run_id):
    return InitLoggerContext({}, PipelineDefinition([]), logger_def, run_id)
Example #24
0
def define_empty_pipeline():
    return PipelineDefinition(name='empty_pipeline', solid_defs=[])
def single_string_output_pipeline():
    @lambda_solid(output_def=OutputDefinition(String))
    def return_foo():
        return "foo"

    return PipelineDefinition(name="single_string_output_pipeline", solid_defs=[return_foo])
Example #26
0
def define_multi_mode_with_resources_pipeline():
    # API red alert. One has to wrap a type in Field because it is callable
    @resource(config_schema=Int)
    def adder_resource(init_context):
        return lambda x: x + init_context.resource_config

    @resource(config_schema=Int)
    def multer_resource(init_context):
        return lambda x: x * init_context.resource_config

    @resource(config_schema={'num_one': Int, 'num_two': Int})
    def double_adder_resource(init_context):
        return (lambda x: x + init_context.resource_config['num_one'] +
                init_context.resource_config['num_two'])

    @solid(required_resource_keys={'op'})
    def apply_to_three(context):
        return context.resources.op(3)

    return PipelineDefinition(
        name='multi_mode_with_resources',
        solid_defs=[apply_to_three],
        mode_defs=[
            ModeDefinition(name='add_mode',
                           resource_defs={'op': adder_resource}),
            ModeDefinition(name='mult_mode',
                           resource_defs={'op': multer_resource}),
            ModeDefinition(
                name='double_adder_mode',
                resource_defs={'op': double_adder_resource},
                description='Mode that adds two numbers to thing',
            ),
        ],
        preset_defs=[
            PresetDefinition.from_files(
                'add',
                mode='add_mode',
                config_files=[
                    file_relative_path(
                        __file__,
                        '../environments/multi_mode_with_resources/add_mode.yaml'
                    )
                ],
            ),
            PresetDefinition(
                'multiproc',
                mode='add_mode',
                run_config={
                    'resources': {
                        'op': {
                            'config': 2
                        }
                    },
                    'execution': {
                        'multiprocess': {}
                    },
                    'storage': {
                        'filesystem': {}
                    },
                },
            ),
        ],
    )
def test_default_mode_definition():
    pipeline_def = PipelineDefinition(name='takes a mode', solids=[])
    assert pipeline_def
Example #28
0
def test_solid_def():
    @lambda_solid
    def produce_string():
        return "foo"

    @solid(
        input_defs=[InputDefinition("input_one", String)],
        output_defs=[OutputDefinition(Any)],
        config_schema={"another_field": Int},
    )
    def solid_one(_context, input_one):
        raise Exception("should not execute")

    pipeline_def = PipelineDefinition(
        solid_defs=[produce_string, solid_one],
        dependencies={"solid_one": {"input_one": DependencyDefinition("produce_string")}},
    )

    assert len(pipeline_def.solids[0].output_handles()) == 1

    assert isinstance(pipeline_def.solid_named("solid_one"), Solid)

    solid_one_solid = pipeline_def.solid_named("solid_one")

    assert solid_one_solid.has_input("input_one")

    assert isinstance(solid_one_solid.input_def_named("input_one"), InputDefinition)

    assert len(solid_one_solid.input_dict) == 1
    assert len(solid_one_solid.output_dict) == 1

    assert str(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert str(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert solid_one_solid.output_handle("result") == SolidOutputHandle(
        solid_one_solid, solid_one_solid.output_dict["result"]
    )

    assert (
        len(pipeline_def.dependency_structure.input_to_upstream_outputs_for_solid("solid_one")) == 1
    )

    assert (
        len(
            pipeline_def.dependency_structure.output_to_downstream_inputs_for_solid(
                "produce_string"
            )
        )
        == 1
    )

    assert len(pipeline_def.dependency_structure.input_handles()) == 1

    assert len(pipeline_def.dependency_structure.items()) == 1
Example #29
0
def test_execute_canceled_state():
    def event_callback(_record):
        pass

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        with pytest.raises(DagsterInvariantViolationError):
            execute_run(
                InMemoryPipeline(pipeline_def),
                pipeline_run,
                instance=instance,
            )

        logs = instance.all_logs(pipeline_run.run_id)

        assert len(logs) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in logs[0].message)

        iter_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        iter_events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 iter_run,
                                 instance=instance))

        assert len(iter_events) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in iter_events[0].message)
Example #30
0
def test_invalid_item_in_solid_list():
    with pytest.raises(DagsterInvalidDefinitionError,
                       match="Invalid item in solid list: 'not_a_solid'"):
        PipelineDefinition(solids=['not_a_solid'])