Beispiel #1
0
def test_multiple_outputs_only_emit_one():
    def _t_fn(*_args):
        yield Output(output_name='output_one', value='foo')

    solid = SolidDefinition(
        name='multiple_outputs',
        input_defs=[],
        output_defs=[
            OutputDefinition(name='output_one'),
            OutputDefinition(name='output_two', is_optional=True),
        ],
        compute_fn=_t_fn,
    )

    called = {}

    def _compute_fn_one(*_args, **_kwargs):
        called['one'] = True

    downstream_one = SolidDefinition(
        name='downstream_one',
        input_defs=[InputDefinition('some_input')],
        output_defs=[],
        compute_fn=_compute_fn_one,
    )

    def _compute_fn_two(*_args, **_kwargs):
        raise Exception('do not call me')

    downstream_two = SolidDefinition(
        name='downstream_two',
        input_defs=[InputDefinition('some_input')],
        output_defs=[],
        compute_fn=_compute_fn_two,
    )

    pipeline = PipelineDefinition(
        solid_defs=[solid, downstream_one, downstream_two],
        dependencies={
            'downstream_one': {
                'some_input': DependencyDefinition(solid.name,
                                                   output='output_one')
            },
            'downstream_two': {
                'some_input': DependencyDefinition(solid.name,
                                                   output='output_two')
            },
        },
    )

    result = execute_pipeline(pipeline)
    assert result.success

    assert called['one']
    solid_result = result.result_for_solid('multiple_outputs')
    assert set(solid_result.output_values.keys()) == set(['output_one'])

    with pytest.raises(
            DagsterInvariantViolationError,
            match='not_defined not defined in solid multiple_outputs'):
        solid_result.output_value('not_defined')

    with pytest.raises(DagsterInvariantViolationError,
                       match='Did not find result output_two'):
        solid_result.output_value('output_two')

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            'Try to get result for solid not_present in <<unnamed>>. No such solid.',
    ):
        result.result_for_solid('not_present')

    assert result.result_for_solid('downstream_two').skipped
def test_reexecution_fs_storage_with_solid_selection():
    @solid
    def return_one():
        return 1

    @solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        name="test",
        dependencies={"add_one": {
            "num": DependencyDefinition("return_one")
        }},
        mode_defs=[default_mode_def_for_test],
    )
    instance = DagsterInstance.ephemeral()
    # Case 1: re-execute a part of a pipeline when the original pipeline doesn't have solid selection
    pipeline_result = execute_pipeline(pipeline_def, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one").output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_result_no_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result.run_id,
        step_selection=["return_one"],
        instance=instance,
    )
    assert reexecution_result_no_solid_selection.success
    assert len(reexecution_result_no_solid_selection.solid_result_list) == 2
    assert reexecution_result_no_solid_selection.result_for_solid(
        "add_one").skipped
    assert reexecution_result_no_solid_selection.result_for_solid(
        "return_one").output_value() == 1

    # Case 2: re-execute a pipeline when the original pipeline has solid selection
    pipeline_result_solid_selection = execute_pipeline(
        pipeline_def,
        instance=instance,
        solid_selection=["return_one"],
    )
    assert pipeline_result_solid_selection.success
    assert len(pipeline_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid("add_one")
    assert pipeline_result_solid_selection.result_for_solid(
        "return_one").output_value() == 1

    reexecution_result_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result_solid_selection.run_id,
        instance=instance,
    )

    assert reexecution_result_solid_selection.success
    assert len(reexecution_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid("add_one")
    assert reexecution_result_solid_selection.result_for_solid(
        "return_one").output_value() == 1

    # Case 3: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which hasn't been included in the original pipeline
    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match="Step selection refers to unknown step: add_one",
    ):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=pipeline_result_solid_selection.run_id,
            step_selection=["add_one"],
            instance=instance,
        )

    # Case 4: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which has been included in the original pipeline
    re_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=reexecution_result_solid_selection.run_id,
        instance=instance,
        step_selection=["return_one"],
    )

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid(
        "return_one").output_value() == 1
Beispiel #3
0

# start_tag_pipeline
@pipeline
def tag_pipeline():
    add_one.tag({"my_tag": "my_value"})(add_one(return_one()))


# end_tag_pipeline


# start_pipeline_definition_marker
one_plus_one_pipeline_def = PipelineDefinition(
    name="one_plus_one_pipeline",
    solid_defs=[return_one, add_one],
    dependencies={"add_one": {"number": DependencyDefinition("return_one")}},
)
# end_pipeline_definition_marker


# start_modes_pipeline
dev_mode = ModeDefinition("dev")
staging_mode = ModeDefinition("staging")
prod_mode = ModeDefinition("prod")


@pipeline(mode_defs=[dev_mode, staging_mode, prod_mode])
def my_modes_pipeline():
    my_solid()

def test_circular_dep():
    with pytest.raises(DagsterInvalidDefinitionError, match='Circular reference'):
        PipelineDefinition(
            solids=solid_a_b_list(),
            dependencies={'A': {}, 'B': {'b_input': DependencyDefinition('B')}},
        )
def test_fan_in_manual():
    # manually building up this guy
    @composite_solid
    def _target_composite_dsl(str_in, none_in):
        num = emit_num()
        return collect([num, str_in, none_in])

    # base case works
    _target_composite_manual = CompositeSolidDefinition(
        name="manual_composite",
        solid_defs=[emit_num, collect],
        input_mappings=[
            InputDefinition("str_in").mapping_to("collect", "stuff", 1),
            InputDefinition("none_in").mapping_to("collect", "stuff", 2),
        ],
        output_mappings=[OutputDefinition().mapping_from("collect")],
        dependencies={
            "collect": {
                "stuff":
                MultiDependencyDefinition([
                    DependencyDefinition("emit_num"),
                    MappedInputPlaceholder,
                    MappedInputPlaceholder,
                ])
            }
        },
    )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            "index 2 in the MultiDependencyDefinition is not a MappedInputPlaceholder",
    ):
        _missing_placeholder = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        MappedInputPlaceholder,
                    ])
                }
            },
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="is not a MultiDependencyDefinition"):
        _bad_target = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff": DependencyDefinition("emit_num")
                }
            },
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="Unsatisfied MappedInputPlaceholder at index 3",
    ):
        _missing_placeholder = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        MappedInputPlaceholder,
                        MappedInputPlaceholder,
                        MappedInputPlaceholder,
                    ])
                }
            },
        )
Beispiel #6
0
def define_composites_pipeline():
    @lambda_solid(inputs=[InputDefinition('num', Int)])
    def add_one(num):
        return num + 1

    @lambda_solid(inputs=[InputDefinition('num')])
    def div_two(num):
        return num / 2

    add_two = CompositeSolidDefinition(
        'add_two',
        solid_defs=[add_one],
        dependencies={
            SolidInvocation('add_one', 'adder_1'): {},
            SolidInvocation('add_one', 'adder_2'): {
                'num': DependencyDefinition('adder_1')
            },
        },
        input_mappings=[
            InputDefinition('num', Int).mapping_to('adder_1', 'num')
        ],
        output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
    )

    add_four = CompositeSolidDefinition(
        'add_four',
        solid_defs=[add_two],
        dependencies={
            SolidInvocation('add_two', 'adder_1'): {},
            SolidInvocation('add_two', 'adder_2'): {
                'num': DependencyDefinition('adder_1')
            },
        },
        input_mappings=[
            InputDefinition('num', Int).mapping_to('adder_1', 'num')
        ],
        output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
    )

    div_four = CompositeSolidDefinition(
        'div_four',
        solid_defs=[div_two],
        dependencies={
            SolidInvocation('div_two', 'div_1'): {},
            SolidInvocation('div_two', 'div_2'): {
                'num': DependencyDefinition('div_1')
            },
        },
        input_mappings=[
            InputDefinition('num', Int).mapping_to('div_1', 'num')
        ],
        output_mappings=[OutputDefinition(Float).mapping_from('div_2')],
    )

    return PipelineDefinition(
        name='composites_pipeline',
        solid_defs=[add_four, div_four],
        dependencies={'div_four': {
            'num': DependencyDefinition('add_four')
        }},
    )
def test_reexecution_fs_storage_with_solid_selection():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={'add_one': {
            'num': DependencyDefinition('return_one')
        }},
    )
    run_config = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.ephemeral()
    # Case 1: re-execute a part of a pipeline when the original pipeline doesn't have solid selection
    pipeline_result = execute_pipeline(pipeline_def,
                                       run_config,
                                       instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one').output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_result_no_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result.run_id,
        run_config=run_config,
        step_keys_to_execute=['return_one.compute'],
        instance=instance,
    )
    assert reexecution_result_no_solid_selection.success
    assert len(reexecution_result_no_solid_selection.solid_result_list) == 2
    assert reexecution_result_no_solid_selection.result_for_solid(
        'add_one').skipped
    assert reexecution_result_no_solid_selection.result_for_solid(
        'return_one').output_value() == 1

    # Case 2: re-execute a pipeline when the original pipeline has solid selection
    pipeline_result_solid_selection = execute_pipeline(
        pipeline_def,
        run_config=run_config,
        instance=instance,
        solid_selection=['return_one'],
    )
    assert pipeline_result_solid_selection.success
    assert len(pipeline_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid('add_one')
    assert pipeline_result_solid_selection.result_for_solid(
        'return_one').output_value() == 1

    reexecution_result_solid_selection = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result_solid_selection.run_id,
        run_config=run_config,
        instance=instance,
    )

    assert reexecution_result_solid_selection.success
    assert len(reexecution_result_solid_selection.solid_result_list) == 1
    with pytest.raises(DagsterInvariantViolationError):
        pipeline_result_solid_selection.result_for_solid('add_one')
    assert reexecution_result_solid_selection.result_for_solid(
        'return_one').output_value() == 1

    # Case 3: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which hasn't been included in the original pipeline
    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match=re.escape(
                'Execution plan does not contain step: add_one.compute'),
    ):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=pipeline_result_solid_selection.run_id,
            run_config=run_config,
            step_keys_to_execute=['add_one.compute'],
            instance=instance,
        )

    # Case 4: re-execute a pipeline partially when the original pipeline has solid selection and
    #   re-exeucte a step which has been included in the original pipeline
    re_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=reexecution_result_solid_selection.run_id,
        run_config=run_config,
        instance=instance,
        step_keys_to_execute=['return_one.compute'],
    )

    assert re_reexecution_result.success
    assert len(re_reexecution_result.solid_result_list) == 1
    assert re_reexecution_result.result_for_solid(
        'return_one').output_value() == 1
Beispiel #8
0
def test_solid_def():
    @lambda_solid
    def produce_string():
        return "foo"

    @solid(
        input_defs=[InputDefinition("input_one", String)],
        output_defs=[OutputDefinition(Any)],
        config_schema={"another_field": Int},
    )
    def solid_one(_context, input_one):
        raise Exception("should not execute")

    pipeline_def = PipelineDefinition(
        solid_defs=[produce_string, solid_one],
        dependencies={
            "solid_one": {
                "input_one": DependencyDefinition("produce_string")
            }
        },
    )

    assert len(pipeline_def.solids[0].output_handles()) == 1

    assert isinstance(pipeline_def.solid_named("solid_one"), Solid)

    solid_one_solid = pipeline_def.solid_named("solid_one")

    assert solid_one_solid.has_input("input_one")

    assert isinstance(solid_one_solid.input_def_named("input_one"),
                      InputDefinition)

    assert len(solid_one_solid.input_dict) == 1
    assert len(solid_one_solid.output_dict) == 1

    assert str(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.input_handle("input_one")) == (
        "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")"
    )

    assert str(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert repr(solid_one_solid.output_handle("result")) == (
        "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")"
    )

    assert solid_one_solid.output_handle("result") == SolidOutputHandle(
        solid_one_solid, solid_one_solid.output_dict["result"])

    assert (len(
        pipeline_def.dependency_structure.input_to_upstream_outputs_for_solid(
            "solid_one")) == 1)

    assert (len(
        pipeline_def.dependency_structure.
        output_to_downstream_inputs_for_solid("produce_string")) == 1)

    assert len(pipeline_def.dependency_structure.input_handles()) == 1

    assert len(pipeline_def.dependency_structure.items()) == 1
Beispiel #9
0
def _define_nothing_dep_pipeline():
    @lambda_solid(output_def=OutputDefinition(Nothing, "complete"))
    def start_nothing():
        pass

    @lambda_solid(input_defs=[
        InputDefinition("add_complete", Nothing),
        InputDefinition("yield_complete", Nothing),
    ])
    def end_nothing():
        pass

    @lambda_solid(output_def=OutputDefinition(Int))
    def emit_value():
        return 1

    @lambda_solid(
        input_defs=[
            InputDefinition("on_complete", Nothing),
            InputDefinition("num", Int)
        ],
        output_def=OutputDefinition(Int),
    )
    def add_value(num):
        return 1 + num

    @solid(
        name="yield_values",
        input_defs=[InputDefinition("on_complete", Nothing)],
        output_defs=[
            OutputDefinition(Int, "num_1"),
            OutputDefinition(Int, "num_2"),
            OutputDefinition(Nothing, "complete"),
        ],
    )
    def yield_values(_context):
        yield Output(1, "num_1")
        yield Output(2, "num_2")
        yield Output(None, "complete")

    return PipelineDefinition(
        name="simple_exc",
        solid_defs=[
            emit_value, add_value, start_nothing, end_nothing, yield_values
        ],
        dependencies={
            "add_value": {
                "on_complete": DependencyDefinition("start_nothing",
                                                    "complete"),
                "num": DependencyDefinition("emit_value"),
            },
            "yield_values": {
                "on_complete": DependencyDefinition("start_nothing",
                                                    "complete")
            },
            "end_nothing": {
                "add_complete": DependencyDefinition("add_value"),
                "yield_complete": DependencyDefinition("yield_values",
                                                       "complete"),
            },
        },
    )
Beispiel #10
0
def test_templated_sql_solid_pipeline():
    sum_sql_template = '''CREATE TABLE {{sum_table}} AS
        SELECT num1, num2, num1 + num2 as sum FROM num_table'''

    sum_sq_sql_template = '''CREATE TABLE {{sum_sq_table}} AS
        SELECT num1, num2, sum, sum * sum as sum_sq FROM {{sum_table}}'''

    sum_solid = create_templated_sql_transform_solid(
        name='sum_table', sql=sum_sql_template, table_arguments=['sum_table'])

    sum_sq_solid = create_templated_sql_transform_solid(
        name='sum_sq_table',
        sql=sum_sq_sql_template,
        table_arguments=['sum_table', 'sum_sq_table'],
        dependant_solids=[sum_solid],
    )

    context = in_mem_context()

    pipeline = pipeline_test_def(
        solids=[sum_solid, sum_sq_solid],
        context=context,
        dependencies={
            sum_sq_solid.name: {
                sum_solid.name: DependencyDefinition(sum_solid.name)
            }
        },
    )
    first_sum_table = 'first_sum_table'
    first_sum_sq_table = 'first_sum_sq_table'

    environment_one = {
        'solids': {
            'sum_table': {
                'config': {
                    'sum_table': first_sum_table
                }
            },
            'sum_sq_table': {
                'config': {
                    'sum_table': first_sum_table,
                    'sum_sq_table': first_sum_sq_table
                }
            },
        }
    }

    first_result = execute_pipeline(pipeline, environment_dict=environment_one)
    assert first_result.success

    assert len(first_result.solid_result_list) == 2
    assert first_result.solid_result_list[0].transformed_value() == {
        'sum_table': first_sum_table
    }
    assert first_result.solid_result_list[1].transformed_value() == {
        'sum_table': first_sum_table,
        'sum_sq_table': first_sum_sq_table,
    }

    assert _load_table(context, first_sum_table) == [(1, 2, 3), (3, 4, 7)]

    assert _load_table(context, first_sum_sq_table) == [(1, 2, 3, 9),
                                                        (3, 4, 7, 49)]

    # now execute subdag

    pipeline_two = pipeline_test_def(
        solids=[define_stub_solid('pass_value', 'TODO'), sum_sq_solid],
        context=context,
        dependencies={
            sum_sq_solid.name: {
                sum_solid.name: DependencyDefinition('pass_value')
            }
        },
    )

    second_sum_sq_table = 'second_sum_sq_table'

    sum_sq_args = {
        'sum_table': first_sum_table,
        'sum_sq_table': second_sum_sq_table
    }

    environment_two = {'solids': {'sum_sq_table': {'config': sum_sq_args}}}

    second_result = execute_pipeline(pipeline_two,
                                     environment_dict=environment_two)
    assert second_result.success
    assert len(second_result.solid_result_list) == 2
    assert _load_table(context, second_sum_sq_table) == [(1, 2, 3, 9),
                                                         (3, 4, 7, 49)]
Beispiel #11
0
# pylint: disable=unused-argument

from dagster import DependencyDefinition, InputDefinition, PipelineDefinition, pipeline, solid


@solid
def return_one(context):
    return 1


@solid(input_defs=[InputDefinition("number", int)])
def add_one(context, number):
    return number + 1


@pipeline
def one_plus_one_pipeline():
    add_one(return_one())


one_plus_one_pipeline_def = PipelineDefinition(
    name='one_plus_one_pipeline',
    solid_defs=[return_one, add_one],
    dependencies={'add_one': {
        'number': DependencyDefinition('return_one')
    }},
)
def test_pandas_hello_no_library():
    def solid_one_transform(_context, inputs):
        num_df = inputs['num_df']
        num_df['sum'] = num_df['num1'] + num_df['num2']
        return num_df

    solid_one = single_output_transform(
        name='solid_one',
        inputs=[InputDefinition(name='num_df')],
        transform_fn=solid_one_transform,
        output=OutputDefinition(),
    )

    def solid_two_transform(_context, inputs):
        sum_df = inputs['sum_df']
        sum_df['sum_sq'] = sum_df['sum'] * sum_df['sum']
        return sum_df

    solid_two = single_output_transform(
        name='solid_two',
        inputs=[InputDefinition(name='sum_df')],
        transform_fn=solid_two_transform,
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[define_read_csv_solid('read_one'), solid_one, solid_two],
        dependencies={
            'solid_one': {
                'num_df': DependencyDefinition('read_one'),
            },
            'solid_two': {
                'sum_df': DependencyDefinition('solid_one'),
            },
        }
    )

    environment = config.Environment(
        solids={
            'read_one': config.Solid({
                'path': script_relative_path('num.csv')
            }),
        }
    )

    execute_pipeline_result = execute_pipeline(
        pipeline,
        environment=environment,
    )

    assert execute_pipeline_result.result_for_solid('solid_two'
                                                    ).transformed_value().to_dict('list') == {
                                                        'num1': [1, 3],
                                                        'num2': [2, 4],
                                                        'sum': [3, 7],
                                                        'sum_sq': [9, 49],
                                                    }

    sum_sq_out_path = '/tmp/sum_sq.csv'
    import os
    if os.path.exists(sum_sq_out_path):
        os.remove(sum_sq_out_path)

    sum_sq_path_args = {'path': '/tmp/sum_sq.csv'}
    environment_two = config.Environment(
        solids={
            'read_one': config.Solid({
                'path': script_relative_path('num.csv')
            }),
            'write_two': config.Solid(sum_sq_path_args),
        },
    )

    pipeline_two = PipelineDefinition(
        solids=[
            define_read_csv_solid('read_one'),
            solid_one,
            solid_two,
            define_to_csv_solid('write_two'),
        ],
        dependencies={
            'solid_one': {
                'num_df': DependencyDefinition('read_one'),
            },
            'solid_two': {
                'sum_df': DependencyDefinition('solid_one'),
            },
            'write_two': {
                'df': DependencyDefinition('solid_two'),
            }
        }
    )

    execute_pipeline(pipeline_two, environment=environment_two)

    sum_sq_df = pd.read_csv('/tmp/sum_sq.csv')

    assert sum_sq_df.to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
        'sum_sq': [9, 49],
    }
Beispiel #13
0
def add_one(num):
    return num + 1


@lambda_solid(inputs=[InputDefinition('num')])
def div_two(num):
    return num / 2


add_two = CompositeSolidDefinition(
    'add_two',
    solids=[add_one],
    dependencies={
        SolidInstance('add_one', 'adder_1'): {},
        SolidInstance('add_one', 'adder_2'): {
            'num': DependencyDefinition('adder_1')
        },
    },
    input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')],
    output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
)

add_four = CompositeSolidDefinition(
    'add_four',
    solids=[add_two],
    dependencies={
        SolidInstance('add_two', 'adder_1'): {},
        SolidInstance('add_two', 'adder_2'): {
            'num': DependencyDefinition('adder_1')
        },
    },
Beispiel #14
0
def test_failure_propagation():
    '''
      B =========== C
     //             \\
    A                F (skipped)
     \\             //
      D (fails) == E (skipped)
    '''

    solid_a = create_root_success_solid('A')

    def fail_fn(_context, inputs):
        check.failed('user error')
        return inputs

    def success_fn(_context, inputs):
        return inputs

    solid_b = single_output_solid(
        name='B',
        input_defs=[InputDefinition(name='A')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_c = single_output_solid(
        name='C',
        input_defs=[InputDefinition(name='B')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_d = single_output_solid(
        name='D',
        input_defs=[InputDefinition(name='A')],
        compute_fn=fail_fn,
        output_def=OutputDefinition(),
    )

    solid_e = single_output_solid(
        name='E',
        input_defs=[InputDefinition(name='D')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    solid_f = single_output_solid(
        name='F',
        input_defs=[InputDefinition(name='C'), InputDefinition(name='E')],
        compute_fn=success_fn,
        output_def=OutputDefinition(),
    )

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f],
        dependencies={
            'B': {'A': DependencyDefinition(solid_a.name)},
            'D': {'A': DependencyDefinition(solid_a.name)},
            'C': {'B': DependencyDefinition(solid_b.name)},
            'E': {'D': DependencyDefinition(solid_d.name)},
            'F': {'C': DependencyDefinition(solid_c.name), 'E': DependencyDefinition(solid_e.name)},
        },
    )

    pipeline_result = execute_pipeline(pipeline_def, raise_on_error=False)

    assert pipeline_result.result_for_solid('A').success
    assert pipeline_result.result_for_solid('B').success
    assert pipeline_result.result_for_solid('C').success
    assert not pipeline_result.result_for_solid('D').success
    assert pipeline_result.result_for_solid('D').failure_data.error.cls_name == 'CheckError'
    assert not pipeline_result.result_for_solid('E').success
    assert pipeline_result.result_for_solid('E').skipped
    assert not pipeline_result.result_for_solid('F').success
    assert pipeline_result.result_for_solid('F').skipped
Beispiel #15
0
def execute_solid(
    solid_def,
    mode_def=None,
    input_values=None,
    environment_dict=None,
    run_config=None,
    raise_on_error=True,
):
    '''Execute a single solid in an ephemeral pipeline.

    Intended to support unit tests. Input values may be passed directly, and no pipeline need be
    specified -- an ephemeral pipeline will be constructed.

    Args:
        solid_def (SolidDefinition): The solid to execute.
        mode_def (Optional[ModeDefinition]): The mode within which to execute the solid. Use this
            if, e.g., custom resources, loggers, or executors are desired.
        input_values (Optional[Dict[str, Any]]): A dict of input names to input values, used to
            pass inputs to the solid directly. You may also use the ``environment_dict`` to
            configure any inputs that are configurable.
        environment_dict (Optional[dict]): The enviroment configuration that parameterizes this
            execution, as a dict.
        run_config (Optional[RunConfig]): Optionally specifies additional config options for
            pipeline execution.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``True``, since this is the most useful behavior in test.

    Returns:
        Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of executing the
        solid.
    '''
    check.inst_param(solid_def, 'solid_def', ISolidDefinition)
    check.opt_inst_param(mode_def, 'mode_def', ModeDefinition)
    input_values = check.opt_dict_param(input_values,
                                        'input_values',
                                        key_type=str)

    solid_defs = [solid_def]

    def create_value_solid(input_name, input_value):
        @lambda_solid(name=input_name)
        def input_solid():
            return input_value

        return input_solid

    dependencies = defaultdict(dict)

    for input_name, input_value in input_values.items():
        dependencies[solid_def.name][input_name] = DependencyDefinition(
            input_name)
        solid_defs.append(create_value_solid(input_name, input_value))

    result = execute_pipeline(
        PipelineDefinition(
            name='ephemeral_{}_solid_pipeline'.format(solid_def.name),
            solid_defs=solid_defs,
            dependencies=dependencies,
            mode_defs=[mode_def] if mode_def else None,
        ),
        environment_dict=environment_dict,
        run_config=run_config,
        raise_on_error=raise_on_error,
    )
    return result.result_for_handle(solid_def.name)
Beispiel #16
0
def test_solid_def():
    @lambda_solid
    def produce_string():
        return 'foo'

    @solid(
        input_defs=[InputDefinition('input_one', String)],
        output_defs=[OutputDefinition(Any)],
        config={'another_field': Field(Int)},
    )
    def solid_one(_context, input_one):
        raise Exception('should not execute')

    pipeline_def = PipelineDefinition(
        solid_defs=[produce_string, solid_one],
        dependencies={
            'solid_one': {
                'input_one': DependencyDefinition('produce_string')
            }
        },
    )

    assert len(pipeline_def.solids[0].output_handles()) == 1

    assert isinstance(pipeline_def.solid_named('solid_one'), Solid)

    solid_one_solid = pipeline_def.solid_named('solid_one')

    assert solid_one_solid.has_input('input_one')

    assert isinstance(solid_one_solid.input_def_named('input_one'),
                      InputDefinition)

    assert len(solid_one_solid.input_dict) == 1
    assert len(solid_one_solid.output_dict) == 1

    assert str(solid_one_solid.input_handle('input_one')) == (
        'SolidInputHandle(definition_name="\'solid_one\'", input_name="\'input_one\'", '
        'solid_name="\'solid_one\'")')

    assert repr(solid_one_solid.input_handle('input_one')) == (
        'SolidInputHandle(definition_name="\'solid_one\'", input_name="\'input_one\'", '
        'solid_name="\'solid_one\'")')

    assert str(solid_one_solid.output_handle('result')) == (
        'SolidOutputHandle(definition_name="\'solid_one\'", output_name="\'result\'", '
        'solid_name="\'solid_one\'")')

    assert repr(solid_one_solid.output_handle('result')) == (
        'SolidOutputHandle(definition_name="\'solid_one\'", output_name="\'result\'", '
        'solid_name="\'solid_one\'")')

    assert solid_one_solid.output_handle('result') == SolidOutputHandle(
        solid_one_solid, solid_one_solid.output_dict['result'])

    assert len(
        pipeline_def.dependency_structure.deps_of_solid_with_input(
            'solid_one')) == 1

    assert len(
        pipeline_def.dependency_structure.depended_by_of_solid(
            'produce_string')) == 1

    assert len(pipeline_def.dependency_structure.input_handles()) == 1

    assert len(pipeline_def.dependency_structure.items()) == 1
Beispiel #17
0
        's3': s3_resource,
        'db_info': redshift_db_info_resource,
        'tempfile': tempfile_resource,
    },
)

process_on_time_data = CompositeSolidDefinition(
    name='process_on_time_data',
    solids=[s3_to_df, join_q2_data, load_data_to_database_from_spark],
    dependencies={
        SolidInstance('s3_to_df', alias='april_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='may_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='june_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='master_cord_s3_to_df'): {},
        'join_q2_data': {
            'april_data': DependencyDefinition('april_on_time_s3_to_df'),
            'may_data': DependencyDefinition('may_on_time_s3_to_df'),
            'june_data': DependencyDefinition('june_on_time_s3_to_df'),
            'master_cord_data': DependencyDefinition('master_cord_s3_to_df'),
        },
        SolidInstance('load_data_to_database_from_spark',
                      alias='load_q2_on_time_data'): {
            'data_frame': DependencyDefinition('join_q2_data')
        },
    },
)

sfo_weather_data = CompositeSolidDefinition(
    name='sfo_weather_data',
    solids=[
        download_from_s3_to_bytes,
Beispiel #18
0
def test_deps_equal():
    assert DependencyDefinition('foo') == DependencyDefinition('foo')
    assert DependencyDefinition('foo') != DependencyDefinition('bar')

    assert DependencyDefinition('foo', 'bar') == DependencyDefinition('foo', 'bar')
    assert DependencyDefinition('foo', 'bar') != DependencyDefinition('foo', 'quuz')