Esempio n. 1
0
def test_failure_propagation():
    """
      B =========== C
     //             \\
    A                F (skipped)
     \\             //
      D (fails) == E (skipped)
    """

    solid_a = create_root_success_solid("A")

    def fail_fn(_context, inputs):
        check.failed("user error")
        return inputs

    def success_fn(_context, inputs):
        return inputs

    solid_b = single_output_solid(
        name="B",
        input_defs=[InputDefinition(name="A")],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_c = single_output_solid(
        name="C",
        input_defs=[InputDefinition(name="B")],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_d = single_output_solid(
        name="D",
        input_defs=[InputDefinition(name="A")],
        compute_fn=fail_fn,
        output_def=OutputDefinition(),
    )

    solid_e = single_output_solid(
        name="E",
        input_defs=[InputDefinition(name="D")],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_f = single_output_solid(
        name="F",
        input_defs=[InputDefinition(name="C"),
                    InputDefinition(name="E")],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f],
        dependencies={
            "B": {
                "A": DependencyDefinition(solid_a.name)
            },
            "D": {
                "A": DependencyDefinition(solid_a.name)
            },
            "C": {
                "B": DependencyDefinition(solid_b.name)
            },
            "E": {
                "D": DependencyDefinition(solid_d.name)
            },
            "F": {
                "C": DependencyDefinition(solid_c.name),
                "E": DependencyDefinition(solid_e.name)
            },
        },
    )

    pipeline_result = execute_pipeline(pipeline_def, raise_on_error=False)

    assert pipeline_result.result_for_solid("A").success
    assert pipeline_result.result_for_solid("B").success
    assert pipeline_result.result_for_solid("C").success
    assert not pipeline_result.result_for_solid("D").success
    assert pipeline_result.result_for_solid(
        "D").failure_data.error.cls_name == "CheckError"
    assert not pipeline_result.result_for_solid("E").success
    assert pipeline_result.result_for_solid("E").skipped
    assert not pipeline_result.result_for_solid("F").success
    assert pipeline_result.result_for_solid("F").skipped
Esempio n. 2
0
def define_hello_world_explicit_yield_pipeline():
    return PipelineDefinition(name='hello_world_explicit_yield_pipeline',
                              solid_defs=[define_hello_world_explicit_yield()])
def test_failure_propagation():
    '''
      B =========== C
     //             \\
    A                F (skipped)
     \\             //
      D (fails) == E (skipped)
    '''

    solid_a = create_root_success_solid('A')

    def fail_fn(_context, inputs):
        check.failed('user error')
        return inputs

    def success_fn(_context, inputs):
        return inputs

    solid_b = single_output_solid(
        name='B',
        input_defs=[InputDefinition(name='A')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_c = single_output_solid(
        name='C',
        input_defs=[InputDefinition(name='B')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_d = single_output_solid(
        name='D',
        input_defs=[InputDefinition(name='A')],
        compute_fn=fail_fn,
        output_def=OutputDefinition(),
    )

    solid_e = single_output_solid(
        name='E',
        input_defs=[InputDefinition(name='D')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_f = single_output_solid(
        name='F',
        input_defs=[InputDefinition(name='C'),
                    InputDefinition(name='E')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f],
        dependencies={
            'B': {
                'A': DependencyDefinition(solid_a.name)
            },
            'D': {
                'A': DependencyDefinition(solid_a.name)
            },
            'C': {
                'B': DependencyDefinition(solid_b.name)
            },
            'E': {
                'D': DependencyDefinition(solid_d.name)
            },
            'F': {
                'C': DependencyDefinition(solid_c.name),
                'E': DependencyDefinition(solid_e.name)
            },
        },
    )

    pipeline_result = execute_pipeline(pipeline_def, raise_on_error=False)

    assert pipeline_result.result_for_solid('A').success
    assert pipeline_result.result_for_solid('B').success
    assert pipeline_result.result_for_solid('C').success
    assert not pipeline_result.result_for_solid('D').success
    assert pipeline_result.result_for_solid(
        'D').failure_data.error.cls_name == 'CheckError'
    assert not pipeline_result.result_for_solid('E').success
    assert pipeline_result.result_for_solid('E').skipped
    assert not pipeline_result.result_for_solid('F').success
    assert pipeline_result.result_for_solid('F').skipped
Esempio n. 4
0
def define_test_all_scalars_pipeline():
    @lambda_solid(input_defs=[InputDefinition('num', Int)])
    def take_int(num):
        return num

    @lambda_solid(output_def=OutputDefinition(Int))
    def produce_int():
        return 2

    @lambda_solid(input_defs=[InputDefinition('string', String)])
    def take_string(string):
        return string

    @lambda_solid(output_def=OutputDefinition(String))
    def produce_string():
        return 'foo'

    @lambda_solid(input_defs=[InputDefinition('path', Path)])
    def take_path(path):
        return path

    @lambda_solid(output_def=OutputDefinition(Path))
    def produce_path():
        return '/path/to/foo'

    @lambda_solid(input_defs=[InputDefinition('float_number', Float)])
    def take_float(float_number):
        return float_number

    @lambda_solid(output_def=OutputDefinition(Float))
    def produce_float():
        return 3.14

    @lambda_solid(input_defs=[InputDefinition('bool_value', Bool)])
    def take_bool(bool_value):
        return bool_value

    @lambda_solid(output_def=OutputDefinition(Bool))
    def produce_bool():
        return True

    @lambda_solid(input_defs=[InputDefinition('any_value', Any)])
    def take_any(any_value):
        return any_value

    @lambda_solid(output_def=OutputDefinition(Any))
    def produce_any():
        return True

    @lambda_solid(input_defs=[InputDefinition('string_list', List[String])])
    def take_string_list(string_list):
        return string_list

    @lambda_solid(input_defs=[InputDefinition('nullable_string', Optional[String])])
    def take_nullable_string(nullable_string):
        return nullable_string

    return PipelineDefinition(
        name='test_all_scalars_pipeline',
        solid_defs=[
            produce_any,
            produce_bool,
            produce_float,
            produce_int,
            produce_path,
            produce_string,
            take_any,
            take_bool,
            take_float,
            take_int,
            take_nullable_string,
            take_path,
            take_string,
            take_string_list,
        ],
    )
Esempio n. 5
0
def define_hello_world_config_pipeline():
    return PipelineDefinition(name='hello_world_config_pipeline',
                              solid_defs=[define_hello_world_config_solid()])
Esempio n. 6
0
def create_diamond_pipeline():
    return PipelineDefinition(
        name="diamond_pipeline", solid_defs=create_diamond_solids(), dependencies=diamond_deps()
    )
Esempio n. 7
0
def test_empty_pipeline_execution():
    result = execute_pipeline(PipelineDefinition(solid_defs=[]))

    assert result.success
Esempio n. 8
0
def _sum_only_pipeline():
    return PipelineDefinition(solid_defs=[sum_table, sum_sq_table],
                              dependencies={})
Esempio n. 9
0
def test_nameless():
    noname = PipelineDefinition([return_one])

    assert noname.name.startswith("__pipeline")
    assert noname.display_name.startswith("__pipeline")
Esempio n. 10
0
def define_empty_pipeline():
    return PipelineDefinition(name="empty_pipeline", solid_defs=[])
Esempio n. 11
0
def define_hello_world_pipeline():
    return PipelineDefinition(name="hello_world_pipeline", solid_defs=[define_hello_world_solid()])
def single_string_output_pipeline():
    @lambda_solid(output_def=OutputDefinition(String))
    def return_foo():
        return 'foo'

    return PipelineDefinition(name='single_string_output_pipeline', solid_defs=[return_foo])
def single_int_output_pipeline():
    @lambda_solid(output_def=OutputDefinition(Int))
    def return_one():
        return 1

    return PipelineDefinition(name='single_int_output_pipeline', solid_defs=[return_one])
Esempio n. 14
0
def define_configurable_hello_pipeline():
    return PipelineDefinition(name='configurable_hello_pipeline',
                              solids=[configurable_hello])
Esempio n. 15
0
    def get_context(self, solid_config=None, mode_def=None, environment_dict=None):
        '''Get a dagstermill execution context for interactive exploration and development.

        Args:
            solid_config (Optional[Any]): If specified, this value will be made available on the
                context as its ``solid_config`` property.
            mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to
                use to construct the context. Specify this if you would like a context constructed
                with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode
                with a console logger will be constructed.
            environment_dict(Optional[dict]): The environment config dict with which to construct
                the context.

        Returns:
            :py:class:`~dagstermill.DagstermillExecutionContext`
        '''
        check.opt_inst_param(mode_def, 'mode_def', ModeDefinition)
        environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str)

        # If we are running non-interactively, and there is already a context reconstituted, return
        # that context rather than overwriting it.
        if self.context is not None and isinstance(
            self.context, DagstermillRuntimeExecutionContext
        ):
            return self.context

        if not mode_def:
            mode_def = ModeDefinition(logger_defs={'dagstermill': colored_console_logger})
            environment_dict['loggers'] = {'dagstermill': {}}

        solid_def = SolidDefinition(
            name='this_solid',
            input_defs=[],
            compute_fn=lambda *args, **kwargs: None,
            output_defs=[],
            description='Ephemeral solid constructed by dagstermill.get_context()',
            required_resource_keys=mode_def.resource_key_set,
        )

        pipeline_def = PipelineDefinition(
            [solid_def], mode_defs=[mode_def], name='ephemeral_dagstermill_pipeline'
        )

        run_id = make_new_run_id()

        # construct stubbed PipelineRun for notebook exploration...
        # The actual pipeline run during pipeline execution will be serialized and reconstituted
        # in the `reconstitute_pipeline_context` call
        pipeline_run = PipelineRun(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode_def.name,
            selector=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
            tags=None,
        )

        self.in_pipeline = False
        self.solid_def = solid_def
        self.pipeline_def = pipeline_def

        execution_plan = create_execution_plan(self.pipeline_def, environment_dict, pipeline_run)
        with scoped_pipeline_context(
            self.pipeline_def,
            environment_dict,
            pipeline_run,
            DagsterInstance.ephemeral(),
            execution_plan,
            scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:

            self.context = DagstermillExecutionContext(
                pipeline_context=pipeline_context,
                solid_config=solid_config,
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan, pipeline_context.system_storage_def
                ),
            )

        return self.context
Esempio n. 16
0
def test_solid_def():
    @lambda_solid
    def produce_string():
        return "foo"

    @solid(
        input_defs=[InputDefinition("input_one", String)],
        output_defs=[OutputDefinition(Any)],
        config_schema={"another_field": Int},
    )
    def solid_one(_context, input_one):
        raise Exception("should not execute")

    pipeline_def = PipelineDefinition(
        solid_defs=[produce_string, solid_one],
        dependencies={
            "solid_one": {
                "input_one": DependencyDefinition("produce_string")
            }
        },
    )

    assert len(pipeline_def.solids[0].output_handles()) == 1

    assert isinstance(pipeline_def.solid_named("solid_one"), Solid)

    solid_one_solid = pipeline_def.solid_named("solid_one")

    assert solid_one_solid.has_input("input_one")

    assert isinstance(solid_one_solid.input_def_named("input_one"),
                      InputDefinition)

    assert len(solid_one_solid.input_dict) == 1
    assert len(solid_one_solid.output_dict) == 1

    assert str(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert str(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert solid_one_solid.output_handle("result") == SolidOutputHandle(
        solid_one_solid, solid_one_solid.output_dict["result"])

    assert (len(
        pipeline_def.dependency_structure.input_to_upstream_outputs_for_solid(
            "solid_one")) == 1)

    assert (len(
        pipeline_def.dependency_structure.
        output_to_downstream_inputs_for_solid("produce_string")) == 1)

    assert len(pipeline_def.dependency_structure.input_handles()) == 1

    assert len(pipeline_def.dependency_structure.items()) == 1
def define_pipeline():
    return PipelineDefinition(name='hello_world_pipeline',
                              solids=[hello_world])
Esempio n. 18
0
def test_pipeline_execution_graph_diamond():
    pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps())
    return _do_test(pipeline, lambda: execute_pipeline_iterator(pipeline))
Esempio n. 19
0
def test_pipeline_execution_graph_diamond():
    pipe = PipelineDefinition(solid_defs=create_diamond_solids(), dependencies=diamond_deps())
    return _do_test(pipe)
Esempio n. 20
0
def test_create_pipeline_with_empty_solids_list():
    single_solid_pipeline = PipelineDefinition(solids=[], dependencies={})

    result = execute_pipeline(single_solid_pipeline)
    assert result.success
Esempio n. 21
0
def test_reexecution_fs_storage_with_solid_selection():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={"add_one": {"num": DependencyDefinition("return_one")}},
    )
    run_config = {"storage": {"filesystem": {}}}
    instance = DagsterInstance.ephemeral()
    # Case 1: re-execute a part of a pipeline when the original pipeline doesn't have solid selection
    pipeline_result = execute_pipeline(pipeline_def, run_config, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one").output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_result_no_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result.run_id,
        run_config=run_config,
        step_selection=["return_one.compute"],
        instance=instance,
    )
    assert reexecution_result_no_solid_selection.success
    assert len(reexecution_result_no_solid_selection.solid_result_list) == 2
    assert reexecution_result_no_solid_selection.result_for_solid("add_one").skipped
    assert reexecution_result_no_solid_selection.result_for_solid("return_one").output_value() == 1

    # Case 2: re-execute a pipeline when the original pipeline has solid selection
    pipeline_result_solid_selection = execute_pipeline(
        pipeline_def, run_config=run_config, instance=instance, solid_selection=["return_one"],
    )
    assert pipeline_result_solid_selection.success
    assert len(pipeline_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid("add_one")
    assert pipeline_result_solid_selection.result_for_solid("return_one").output_value() == 1

    reexecution_result_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result_solid_selection.run_id,
        run_config=run_config,
        instance=instance,
    )

    assert reexecution_result_solid_selection.success
    assert len(reexecution_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid("add_one")
    assert reexecution_result_solid_selection.result_for_solid("return_one").output_value() == 1

    # Case 3: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which hasn't been included in the original pipeline
    with pytest.raises(
        DagsterInvalidSubsetError,
        match=re.escape("No qualified steps to execute found for step_selection"),
    ):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=pipeline_result_solid_selection.run_id,
            run_config=run_config,
            step_selection=["add_one.compute"],
            instance=instance,
        )

    # Case 4: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which has been included in the original pipeline
    re_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=reexecution_result_solid_selection.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["return_one.compute"],
    )

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid("return_one").output_value() == 1
Esempio n. 22
0
def test_singleton_pipeline():
    stub_solid = define_stub_solid('stub', [{'a key': 'a value'}])
    single_solid_pipeline = PipelineDefinition(solids=[stub_solid], dependencies={})

    result = execute_pipeline(single_solid_pipeline)
    assert result.success
Esempio n. 23
0
def define_bad_kernel_pipeline():
    return PipelineDefinition(name='bad_kernel_pipeline',
                              solid_defs=[bad_kernel_solid])
Esempio n. 24
0
def execute_isolated_solid(solid_def, environment_dict=None):
    return execute_pipeline(PipelineDefinition(name='test',
                                               solid_defs=[solid_def]),
                            environment_dict=environment_dict)
Esempio n. 25
0
def define_hello_world_with_output_pipeline():
    return PipelineDefinition(name='hello_world_with_output_pipeline',
                              solid_defs=[define_hello_world_with_output()])
Esempio n. 26
0
def test_optional_and_required_context():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[],
        mode_defs=[
            ModeDefinition(
                name='mixed',
                resource_defs={
                    'optional_resource':
                    ResourceDefinition(
                        lambda: None,
                        config_schema={
                            'optional_field': Field(String, is_required=False)
                        },
                    ),
                    'required_resource':
                    ResourceDefinition(
                        lambda: None,
                        config_schema={'required_field': String},
                    ),
                },
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)
    assert env_type.fields['solids'].is_required is False

    assert env_type.fields['execution'].is_required is False

    assert nested_field(env_type, 'resources').is_required
    assert nested_field(env_type, 'resources',
                        'optional_resource').is_required is False
    assert nested_field(env_type, 'resources', 'optional_resource',
                        'config').is_required is False
    assert (nested_field(env_type, 'resources', 'optional_resource', 'config',
                         'optional_field').is_required is False)

    assert nested_field(env_type, 'resources', 'required_resource').is_required
    assert nested_field(env_type, 'resources', 'required_resource',
                        'config').is_required
    assert nested_field(env_type, 'resources', 'required_resource', 'config',
                        'required_field').is_required

    env_obj = EnvironmentConfig.build(
        pipeline_def,
        {
            'resources': {
                'required_resource': {
                    'config': {
                        'required_field': 'foo'
                    }
                }
            }
        },
    )

    assert env_obj.resources == {
        'optional_resource': {
            'config': {}
        },
        'required_resource': {
            'config': {
                'required_field': 'foo'
            }
        },
    }
Esempio n. 27
0
def define_hello_logging_pipeline():
    return PipelineDefinition(name='hello_logging_pipeline',
                              solid_defs=[define_hello_logging_solid()])
Esempio n. 28
0
def test_execute_isolated_solids_with_bad_solid_names():
    with pytest.raises(DagsterInvariantViolationError, match='but that solid was not found'):
        execute_solids_within_pipeline(PipelineDefinition([]), [], {'foo': {'bar': 'baz'}})
Esempio n. 29
0
def execute_solid(
    solid_def,
    mode_def=None,
    input_values=None,
    tags=None,
    run_config=None,
    raise_on_error=True,
    environment_dict=None,
):
    '''Execute a single solid in an ephemeral pipeline.

    Intended to support unit tests. Input values may be passed directly, and no pipeline need be
    specified -- an ephemeral pipeline will be constructed.

    Args:
        solid_def (SolidDefinition): The solid to execute.
        mode_def (Optional[ModeDefinition]): The mode within which to execute the solid. Use this
            if, e.g., custom resources, loggers, or executors are desired.
        input_values (Optional[Dict[str, Any]]): A dict of input names to input values, used to
            pass inputs to the solid directly. You may also use the ``run_config`` to
            configure any inputs that are configurable.
        tags (Optional[Dict[str, Any]]): Arbitrary key-value pairs that will be added to pipeline
            logs.
        run_config (Optional[dict]): The environment configuration that parameterized this
            execution, as a dict.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``True``, since this is the most useful behavior in test.

    Returns:
        Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of executing the
        solid.
    '''
    check.inst_param(solid_def, 'solid_def', ISolidDefinition)
    check.opt_inst_param(mode_def, 'mode_def', ModeDefinition)
    input_values = check.opt_dict_param(input_values, 'input_values', key_type=str)
    # backcompact
    run_config = canonicalize_run_config(run_config, environment_dict)

    solid_defs = [solid_def]

    def create_value_solid(input_name, input_value):
        @lambda_solid(name=input_name)
        def input_solid():
            return input_value

        return input_solid

    dependencies = defaultdict(dict)

    for input_name, input_value in input_values.items():
        dependencies[solid_def.name][input_name] = DependencyDefinition(input_name)
        solid_defs.append(create_value_solid(input_name, input_value))

    result = execute_pipeline(
        PipelineDefinition(
            name='ephemeral_{}_solid_pipeline'.format(solid_def.name),
            solid_defs=solid_defs,
            dependencies=dependencies,
            mode_defs=[mode_def] if mode_def else None,
        ),
        run_config=run_config,
        mode=mode_def.name if mode_def else None,
        tags=tags,
        raise_on_error=raise_on_error,
    )
    return result.result_for_handle(solid_def.name)
Esempio n. 30
0
def define_bad_pipeline():
    @solid(config_field=Field(Int, default_value='number'))
    def bad_context():
        pass

    return PipelineDefinition(name='bad', solids=[bad_context])