Ejemplo n.º 1
0
def test_io_error_is_decent():
    with pytest.raises(DagsterInvalidDefinitionError, match="mapping_to"):
        CompositeSolidDefinition(
            name="comp_a_outer",
            solid_defs=[],
            input_mappings=[InputDefinition("should_be_mapped")])

    with pytest.raises(DagsterInvalidDefinitionError, match="mapping_from"):
        CompositeSolidDefinition(name="comp_a_outer",
                                 solid_defs=[],
                                 output_mappings=[OutputDefinition()])
Ejemplo n.º 2
0
def test_composite_io_mapping():
    a_source = define_stub_solid('A_source', [input_set('A_input')])
    node_a = create_root_solid('A')

    node_b = create_solid_with_deps('B', node_a)
    node_c = create_solid_with_deps('C', node_b)

    comp_a_inner = CompositeSolidDefinition(
        name='comp_a_inner',
        solid_defs=[a_source, node_a],
        dependencies={'A': {
            'A_input': DependencyDefinition('A_source')
        }},
        output_mappings=[OutputDefinition().mapping_from('A')],
    )

    comp_a_outer = CompositeSolidDefinition(
        name='comp_a_outer',
        solid_defs=[comp_a_inner],
        output_mappings=[OutputDefinition().mapping_from('comp_a_inner')],
    )

    comp_bc_inner = CompositeSolidDefinition(
        name='comp_bc_inner',
        solid_defs=[node_b, node_c],
        dependencies={'C': {
            'B': DependencyDefinition('B')
        }},
        input_mappings=[
            InputDefinition(name='inner_B_in').mapping_to(solid_name='B',
                                                          input_name='A')
        ],
    )

    comp_bc_outer = CompositeSolidDefinition(
        name='comp_bc_outer',
        solid_defs=[comp_bc_inner],
        dependencies={},
        input_mappings=[
            InputDefinition(name='outer_B_in').mapping_to(
                solid_name='comp_bc_inner', input_name='inner_B_in')
        ],
    )

    @pipeline
    def wrapped_io():
        comp_bc_outer(comp_a_outer())

    result = execute_pipeline(wrapped_io)
    assert result.success
Ejemplo n.º 3
0
def test_composite_basic_execution():
    a_source = define_stub_solid('A_source', [input_set('A_input')])
    node_a = create_root_solid('A')
    node_b = create_solid_with_deps('B', node_a)
    node_c = create_solid_with_deps('C', node_a)
    node_d = create_solid_with_deps('D', node_b, node_c)

    diamond_composite = CompositeSolidDefinition(
        name='diamond_composite',
        solid_defs=[a_source, node_a, node_b, node_c, node_d],
        dependencies={
            'A': {
                'A_input': DependencyDefinition('A_source')
            },
            'B': {
                'A': DependencyDefinition('A')
            },
            'C': {
                'A': DependencyDefinition('A')
            },
            'D': {
                'B': DependencyDefinition('B'),
                'C': DependencyDefinition('C')
            },
        },
    )

    result = execute_pipeline(
        PipelineDefinition(solid_defs=[diamond_composite]))
    assert result.success

    result = execute_pipeline(
        PipelineDefinition(
            solid_defs=[diamond_composite],
            dependencies={
                SolidInvocation('diamond_composite', alias='D1'): {},
                SolidInvocation('diamond_composite', alias='D2'): {},
            },
        ))
    assert result.success

    wrapped_composite = CompositeSolidDefinition(
        name='wrapped_composite', solid_defs=[diamond_composite])
    result = execute_pipeline(
        PipelineDefinition(solid_defs=[diamond_composite, wrapped_composite]))
    assert result.success

    empty_composite = CompositeSolidDefinition(name='empty', solid_defs=[])
    result = execute_pipeline(PipelineDefinition(solid_defs=[empty_composite]))
    assert result.success
Ejemplo n.º 4
0
def test_composite_io_mapping():
    a_source = define_stub_solid("A_source", [input_set("A_input")])
    node_a = create_root_solid("A")

    node_b = create_solid_with_deps("B", node_a)
    node_c = create_solid_with_deps("C", node_b)

    comp_a_inner = CompositeSolidDefinition(
        name="comp_a_inner",
        solid_defs=[a_source, node_a],
        dependencies={"A": {
            "A_input": DependencyDefinition("A_source")
        }},
        output_mappings=[OutputDefinition().mapping_from("A")],
    )

    comp_a_outer = CompositeSolidDefinition(
        name="comp_a_outer",
        solid_defs=[comp_a_inner],
        output_mappings=[OutputDefinition().mapping_from("comp_a_inner")],
    )

    comp_bc_inner = CompositeSolidDefinition(
        name="comp_bc_inner",
        solid_defs=[node_b, node_c],
        dependencies={"C": {
            "B": DependencyDefinition("B")
        }},
        input_mappings=[
            InputDefinition(name="inner_B_in").mapping_to(solid_name="B",
                                                          input_name="A")
        ],
    )

    comp_bc_outer = CompositeSolidDefinition(
        name="comp_bc_outer",
        solid_defs=[comp_bc_inner],
        dependencies={},
        input_mappings=[
            InputDefinition(name="outer_B_in").mapping_to(
                solid_name="comp_bc_inner", input_name="inner_B_in")
        ],
    )

    @pipeline
    def wrapped_io():
        comp_bc_outer(comp_a_outer())

    result = execute_pipeline(wrapped_io)
    assert result.success
Ejemplo n.º 5
0
def test_mapped_composite_input_expectations():
    called = {}

    def exp_a(_c, _v):
        called['exp_a'] = True
        return ExpectationResult(True)

    @solid(inputs=[InputDefinition('one', expectations=[ExpectationDefinition('exp_a', exp_a)])])
    def node_a(_context, one):
        called['node_a'] = True
        assert one is 1

    def inner_exp(_c, _v):
        called['inner_exp'] = True
        return ExpectationResult(True)

    def outer_exp(_c, _v):
        called['outer_exp'] = True
        return ExpectationResult(True)

    inner = CompositeSolidDefinition(
        name='inner',
        solids=[node_a],
        input_mappings=[
            InputDefinition(
                name='inner_one', expectations=[ExpectationDefinition('inner_exp', inner_exp)]
            ).mapping_to('node_a', 'one')
        ],
    )
    outer = CompositeSolidDefinition(
        name='outer',
        solids=[inner],
        input_mappings=[
            InputDefinition(
                'outer_one', expectations=[ExpectationDefinition('outer_exp', outer_exp)]
            ).mapping_to('inner', 'inner_one')
        ],
    )
    pipeline = PipelineDefinition(name='composites_pipeline', solids=[outer])

    result = execute_pipeline(
        pipeline, {'solids': {'outer': {'inputs': {'outer_one': {'value': 1}}}}}
    )
    assert result.success
    assert called['node_a']
    assert called['exp_a']
    assert called['inner_exp']
    assert called['outer_exp']
Ejemplo n.º 6
0
def test_mapped_composite_config_input():
    called = {}

    @solid(input_defs=[InputDefinition("one")])
    def node_a(_context, one):
        called["node_a"] = True
        assert one == 1

    @composite_solid
    def inner(inner_one):
        node_a(inner_one)

    outer = CompositeSolidDefinition(
        name="outer",
        solid_defs=[inner],
        input_mappings=[
            InputDefinition("outer_one").mapping_to("inner", "inner_one")
        ],
    )
    pipe = PipelineDefinition(name="composites_pipeline", solid_defs=[outer])

    result = execute_pipeline(
        pipe, {"solids": {
            "outer": {
                "inputs": {
                    "outer_one": {
                        "value": 1
                    }
                }
            }
        }})
    assert result.success
    assert called["node_a"]
Ejemplo n.º 7
0
def test_types_descent():
    @dagster_type
    class Foo(object):
        pass

    @solid(outputs=[OutputDefinition(Foo)])
    def inner_solid(_context):
        return Foo()

    middle_solid = CompositeSolidDefinition(name='middle_solid', solid_defs=[inner_solid])

    outer_solid = CompositeSolidDefinition(name='outer_solid', solid_defs=[middle_solid])

    pipe = PipelineDefinition(name='layered_types', solid_defs=[outer_solid])

    assert pipe.has_runtime_type('Foo')
Ejemplo n.º 8
0
def test_mapped_composite_config_input():
    called = {}

    @solid(input_defs=[InputDefinition('one')])
    def node_a(_context, one):
        called['node_a'] = True
        assert one == 1

    @composite_solid
    def inner(inner_one):
        node_a(inner_one)

    outer = CompositeSolidDefinition(
        name='outer',
        solid_defs=[inner],
        input_mappings=[
            InputDefinition('outer_one').mapping_to('inner', 'inner_one')
        ],
    )
    pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer])

    result = execute_pipeline(
        pipe, {'solids': {
            'outer': {
                'inputs': {
                    'outer_one': {
                        'value': 1
                    }
                }
            }
        }})
    assert result.success
    assert called['node_a']
Ejemplo n.º 9
0
def define_composite_materialization_pipeline(
    should_require_resources=True, resources_initted=None
):
    if resources_initted is None:
        resources_initted = {}

    @resource
    def resource_a(_):
        resources_initted['a'] = True
        yield 'A'

    @dagster_type_materializer(
        String, required_resource_keys={'a'} if should_require_resources else set()
    )
    def materialize(context, *_args, **_kwargs):
        assert context.resources.a == 'A'
        return AssetMaterialization('hello')

    CustomDagsterType = create_any_type(name='CustomType', materializer=materialize)

    @solid(output_defs=[OutputDefinition(CustomDagsterType)])
    def output_solid(_context):
        return 'hello'

    wrap_solid = CompositeSolidDefinition(
        name="wrap_solid",
        solid_defs=[output_solid],
        output_mappings=[OutputDefinition(CustomDagsterType).mapping_from('output_solid')],
    )

    @pipeline(mode_defs=[ModeDefinition(resource_defs={'a': resource_a})])
    def output_pipeline():
        wrap_solid()

    return output_pipeline
Ejemplo n.º 10
0
def define_composites_pipeline():
    @lambda_solid(inputs=[InputDefinition('num', Int)])
    def add_one(num):
        return num + 1

    @lambda_solid(inputs=[InputDefinition('num')])
    def div_two(num):
        return num / 2

    add_two = CompositeSolidDefinition(
        'add_two',
        solids=[add_one],
        dependencies={
            SolidInstance('add_one', 'adder_1'): {},
            SolidInstance('add_one', 'adder_2'): {'num': DependencyDefinition('adder_1')},
        },
        input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')],
        output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
    )

    add_four = CompositeSolidDefinition(
        'add_four',
        solids=[add_two],
        dependencies={
            SolidInstance('add_two', 'adder_1'): {},
            SolidInstance('add_two', 'adder_2'): {'num': DependencyDefinition('adder_1')},
        },
        input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')],
        output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
    )

    div_four = CompositeSolidDefinition(
        'div_four',
        solids=[div_two],
        dependencies={
            SolidInstance('div_two', 'div_1'): {},
            SolidInstance('div_two', 'div_2'): {'num': DependencyDefinition('div_1')},
        },
        input_mappings=[InputDefinition('num', Int).mapping_to('div_1', 'num')],
        output_mappings=[OutputDefinition(Float).mapping_from('div_2')],
    )

    return PipelineDefinition(
        name='composites_pipeline',
        solids=[add_four, div_four],
        dependencies={'div_four': {'num': DependencyDefinition('add_four')}},
    )
Ejemplo n.º 11
0
def test_composite_config():
    called = {}

    @solid(config_field=Field(String))
    def configured(context):
        called['configured'] = True
        assert context.solid_config is 'yes'

    inner = CompositeSolidDefinition(name='inner', solid_defs=[configured])
    outer = CompositeSolidDefinition(name='outer', solid_defs=[inner])
    pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer])
    result = execute_pipeline(
        pipe,
        {'solids': {'outer': {'solids': {'inner': {'solids': {'configured': {'config': 'yes'}}}}}}},
    )
    assert result.success
    assert called['configured']
Ejemplo n.º 12
0
def test_composite_config_input():
    called = {}

    @solid(inputs=[InputDefinition('one')])
    def node_a(_context, one):
        called['node_a'] = True
        assert one is 1

    inner = CompositeSolidDefinition(name='inner', solid_defs=[node_a])
    outer = CompositeSolidDefinition(name='outer', solid_defs=[inner])
    pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer])
    result = execute_pipeline(
        pipe,
        {
            'solids': {
                'outer': {
                    'solids': {'inner': {'solids': {'node_a': {'inputs': {'one': {'value': 1}}}}}}
                }
            }
        },
    )
    assert result.success
    assert called['node_a']
Ejemplo n.º 13
0
def test_cycle_detect():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add(a, b):
        return a + b

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="Circular dependencies exist"):
        PipelineDefinition(
            solid_defs=[return_one, add],
            name="test",
            dependencies={
                NodeInvocation("add", alias="first"): {
                    "a": DependencyDefinition("return_one"),
                    "b": DependencyDefinition("second"),
                },
                NodeInvocation("add", alias="second"): {
                    "a": DependencyDefinition("first"),
                    "b": DependencyDefinition("return_one"),
                },
            },
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="Circular dependencies exist"):
        CompositeSolidDefinition(
            name="circletron",
            solid_defs=[return_one, add],
            dependencies={
                NodeInvocation("add", alias="first"): {
                    "a": DependencyDefinition("return_one"),
                    "b": DependencyDefinition("second"),
                },
                NodeInvocation("add", alias="second"): {
                    "a": DependencyDefinition("first"),
                    "b": DependencyDefinition("return_one"),
                },
            },
        )
Ejemplo n.º 14
0
def test_cycle_detect():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add(a, b):
        return a + b

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="Circular dependencies exist"):
        PipelineDefinition(
            solid_defs=[return_one, add],
            dependencies={
                SolidInvocation('add', alias='first'): {
                    'a': DependencyDefinition('return_one'),
                    'b': DependencyDefinition('second'),
                },
                SolidInvocation('add', alias='second'): {
                    'a': DependencyDefinition('first'),
                    'b': DependencyDefinition('return_one'),
                },
            },
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="Circular dependencies exist"):
        CompositeSolidDefinition(
            name='circletron',
            solid_defs=[return_one, add],
            dependencies={
                SolidInvocation('add', alias='first'): {
                    'a': DependencyDefinition('return_one'),
                    'b': DependencyDefinition('second'),
                },
                SolidInvocation('add', alias='second'): {
                    'a': DependencyDefinition('first'),
                    'b': DependencyDefinition('return_one'),
                },
            },
        )
Ejemplo n.º 15
0
def test_composite_mapping_collision():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add(a, b):
        return a + b

    with pytest.raises(DagsterInvalidDefinitionError, match="already satisfied by output"):
        CompositeSolidDefinition(
            name="add_one",
            solid_defs=[return_one, add],
            input_mappings=[InputDefinition("val").mapping_to("add", "a")],
            dependencies={
                "add": {
                    "a": DependencyDefinition("return_one"),
                    "b": DependencyDefinition("return_one"),
                }
            },
        )
Ejemplo n.º 16
0
def test_mapping_errors():
    @lambda_solid
    def echo(foo):
        return foo

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="references solid 'inner' which it does not contain"):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            input_mappings=[
                InputDefinition('mismatch').mapping_to('inner', 'foo')
            ],
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="no input named 'bar'"):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            input_mappings=[
                InputDefinition('mismatch').mapping_to('echo', 'bar')
            ],
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="InputMapping source and destination must have the same type",
    ):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            input_mappings=[
                InputDefinition('mismatch', str).mapping_to('echo', 'foo')
            ],
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            "mappings with same definition name but different definitions",
    ):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            input_mappings=[
                InputDefinition('mismatch').mapping_to('echo', 'foo'),
                InputDefinition('mismatch').mapping_to('echo_2', 'foo'),
            ],
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="references solid 'inner' which it does not contain"):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            output_mappings=[
                OutputDefinition().mapping_from('inner', 'result')
            ],
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="no output named 'return'"):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            output_mappings=[
                OutputDefinition().mapping_from('echo', 'return')
            ],
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            "OutputMapping source and destination must have the same type",
    ):
        CompositeSolidDefinition(
            name='bad',
            solid_defs=[echo],
            output_mappings=[
                OutputDefinition(str).mapping_from('echo', 'result')
            ],
        )
Ejemplo n.º 17
0
                             fraction=context.solid_config['subsample_pct'] /
                             100.0)


s3_to_df = CompositeSolidDefinition(
    name='s3_to_df',
    solids=[download_from_s3_to_bytes, unzip_file, ingest_csv_to_spark],
    dependencies={
        'unzip_file': {
            'archive_file': DependencyDefinition('download_from_s3_to_bytes')
        },
        'ingest_csv_to_spark': {
            'input_csv_file': DependencyDefinition('unzip_file')
        },
    },
    input_mappings=[
        InputDefinition('bucket_data',
                        S3BucketData).mapping_to('download_from_s3_to_bytes',
                                                 'bucket_data'),
        InputDefinition('archive_member',
                        String).mapping_to('unzip_file', 'archive_member'),
    ],
    output_mappings=[
        OutputDefinition(SparkDataFrameType).mapping_from(
            'ingest_csv_to_spark')
    ],
)

s3_to_dw_table = CompositeSolidDefinition(
    name='s3_to_dw_table',
    solids=[
Ejemplo n.º 18
0
@lambda_solid(input_defs=[InputDefinition('num', Int)])
def add_one(num):
    return num + 1


@lambda_solid(input_defs=[InputDefinition('num')])
def div_two(num):
    return num / 2


add_two = CompositeSolidDefinition(
    'add_two',
    solid_defs=[add_one],
    dependencies={
        SolidInvocation('add_one', 'adder_1'): {},
        SolidInvocation('add_one', 'adder_2'): {
            'num': DependencyDefinition('adder_1')
        },
    },
    input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')],
    output_mappings=[OutputDefinition(Int).mapping_from('adder_2')],
)

add_four = CompositeSolidDefinition(
    'add_four',
    solid_defs=[add_two],
    dependencies={
        SolidInvocation('add_two', 'adder_1'): {},
        SolidInvocation('add_two', 'adder_2'): {
            'num': DependencyDefinition('adder_1')
        },
    },
Ejemplo n.º 19
0
    def build_composite_solid_definition(self,
                                         name,
                                         assets_to_update,
                                         include_nothing_input=False):
        """Build a composite solid definition for the assets in `assets_to_update`.

        By default the composite solid will not accept any inputs. If you need to run this composite
        _after_ other solids have run, pass `include_nothing_input=True`, which will create a
        single input of type `Nothing` to the composite solid, and a mapping to each 'source' asset
        input (i.e. those assets without `compute_fn`s, such as those created by `source_asset` or
        `source_table`).

        Examples:

            .. code-block:: python

            @solid(required_resource_keys={"filesystem", "pyspark"})
            def save_orders(context) -> Nothing:
                orders = context.resources.pyspark.spark_session.createDataFrame([
                    Row(id=1, name="foo"), Row(id=2, name="bar"), Row(id=3, name="baz"),
                ])
                path = context.resources.filesystem.get_fs_path(("orders.csv",))
                orders.write.format("csv").options(header="true").save(path, mode="overwrite")

            orders_asset = source_asset(path="orders.csv")

            @computed_asset(input_assets=[orders_asset])
            def orders_top1_asset(orders: DataFrame) -> DataFrame:
                return orders.limit(1)

            run_lakehouse = lakehouse.build_composite_solid_definition(
                name="lakehouse_solid",
                assets_to_update=[orders_top1_asset],
                include_nothing_input=True,
            )

            @pipeline(mode_defs=[mode_def], preset_defs=[preset_def])
            def simple_pipeline():
                run_lakehouse(save_orders())

            # If you have multiple solids which need to run first:

            @lambda_solid
            def other_side_effect() -> Nothing:
                # Perhaps this writes to a database or some other required source table.
                pass

            @lambda_solid(
                input_defs=[InputDefinition("orders", Nothing), InputDefinition("other", Nothing)]
            )
            def wait_until_complete() -> Nothing:
                pass

            @pipeline(mode_defs=[mode_def], preset_defs=[preset_def])
            def pipeline_multi_deps():
                completed = wait_until_complete(orders=save_orders(), other=other_side_effect())
                run_lakehouse(completed)

        """
        solid_defs, solid_deps = self._get_solid_deps_and_defs(
            assets_to_update, include_nothing_input)

        if include_nothing_input:
            # Map a single `InputDefinition`, of type `Nothing`, to every
            # solid in the solid definitions we just created that have an input
            # named 'nothing'.
            # Ideally we'd do this based on something more explicit than the names and types
            # of solid inputs, but it's vanishingly unlikely that users will return `Nothing`
            # from an asset, since there would be nothing to save to the Lakehouse that way.
            nothing_input = InputDefinition("nothing", Nothing)
            input_mappings = [
                nothing_input.mapping_to("__".join(solid_name), "nothing")
                for solid_name, solid_def in solid_defs.items()
                if solid_def.input_defs[0].name == "nothing"
                and solid_def.input_defs[0].dagster_type.is_nothing
            ]
        else:
            input_mappings = None

        return CompositeSolidDefinition(
            name=name,
            solid_defs=list(solid_defs.values()),
            dependencies=solid_deps,
            input_mappings=input_mappings,
        )
Ejemplo n.º 20
0
        's3': s3_resource,
        'db_info': redshift_db_info_resource,
        'tempfile': tempfile_resource,
    },
)

process_on_time_data = CompositeSolidDefinition(
    name='process_on_time_data',
    solids=[s3_to_df, join_q2_data, load_data_to_database_from_spark],
    dependencies={
        SolidInstance('s3_to_df', alias='april_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='may_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='june_on_time_s3_to_df'): {},
        SolidInstance('s3_to_df', alias='master_cord_s3_to_df'): {},
        'join_q2_data': {
            'april_data': DependencyDefinition('april_on_time_s3_to_df'),
            'may_data': DependencyDefinition('may_on_time_s3_to_df'),
            'june_data': DependencyDefinition('june_on_time_s3_to_df'),
            'master_cord_data': DependencyDefinition('master_cord_s3_to_df'),
        },
        SolidInstance('load_data_to_database_from_spark',
                      alias='load_q2_on_time_data'): {
            'data_frame': DependencyDefinition('join_q2_data')
        },
    },
)

sfo_weather_data = CompositeSolidDefinition(
    name='sfo_weather_data',
    solids=[
        download_from_s3_to_bytes,
        ingest_csv_to_spark,