def test_io_error_is_decent(): with pytest.raises(DagsterInvalidDefinitionError, match="mapping_to"): CompositeSolidDefinition( name="comp_a_outer", solid_defs=[], input_mappings=[InputDefinition("should_be_mapped")]) with pytest.raises(DagsterInvalidDefinitionError, match="mapping_from"): CompositeSolidDefinition(name="comp_a_outer", solid_defs=[], output_mappings=[OutputDefinition()])
def test_composite_io_mapping(): a_source = define_stub_solid('A_source', [input_set('A_input')]) node_a = create_root_solid('A') node_b = create_solid_with_deps('B', node_a) node_c = create_solid_with_deps('C', node_b) comp_a_inner = CompositeSolidDefinition( name='comp_a_inner', solid_defs=[a_source, node_a], dependencies={'A': { 'A_input': DependencyDefinition('A_source') }}, output_mappings=[OutputDefinition().mapping_from('A')], ) comp_a_outer = CompositeSolidDefinition( name='comp_a_outer', solid_defs=[comp_a_inner], output_mappings=[OutputDefinition().mapping_from('comp_a_inner')], ) comp_bc_inner = CompositeSolidDefinition( name='comp_bc_inner', solid_defs=[node_b, node_c], dependencies={'C': { 'B': DependencyDefinition('B') }}, input_mappings=[ InputDefinition(name='inner_B_in').mapping_to(solid_name='B', input_name='A') ], ) comp_bc_outer = CompositeSolidDefinition( name='comp_bc_outer', solid_defs=[comp_bc_inner], dependencies={}, input_mappings=[ InputDefinition(name='outer_B_in').mapping_to( solid_name='comp_bc_inner', input_name='inner_B_in') ], ) @pipeline def wrapped_io(): comp_bc_outer(comp_a_outer()) result = execute_pipeline(wrapped_io) assert result.success
def test_composite_basic_execution(): a_source = define_stub_solid('A_source', [input_set('A_input')]) node_a = create_root_solid('A') node_b = create_solid_with_deps('B', node_a) node_c = create_solid_with_deps('C', node_a) node_d = create_solid_with_deps('D', node_b, node_c) diamond_composite = CompositeSolidDefinition( name='diamond_composite', solid_defs=[a_source, node_a, node_b, node_c, node_d], dependencies={ 'A': { 'A_input': DependencyDefinition('A_source') }, 'B': { 'A': DependencyDefinition('A') }, 'C': { 'A': DependencyDefinition('A') }, 'D': { 'B': DependencyDefinition('B'), 'C': DependencyDefinition('C') }, }, ) result = execute_pipeline( PipelineDefinition(solid_defs=[diamond_composite])) assert result.success result = execute_pipeline( PipelineDefinition( solid_defs=[diamond_composite], dependencies={ SolidInvocation('diamond_composite', alias='D1'): {}, SolidInvocation('diamond_composite', alias='D2'): {}, }, )) assert result.success wrapped_composite = CompositeSolidDefinition( name='wrapped_composite', solid_defs=[diamond_composite]) result = execute_pipeline( PipelineDefinition(solid_defs=[diamond_composite, wrapped_composite])) assert result.success empty_composite = CompositeSolidDefinition(name='empty', solid_defs=[]) result = execute_pipeline(PipelineDefinition(solid_defs=[empty_composite])) assert result.success
def test_composite_io_mapping(): a_source = define_stub_solid("A_source", [input_set("A_input")]) node_a = create_root_solid("A") node_b = create_solid_with_deps("B", node_a) node_c = create_solid_with_deps("C", node_b) comp_a_inner = CompositeSolidDefinition( name="comp_a_inner", solid_defs=[a_source, node_a], dependencies={"A": { "A_input": DependencyDefinition("A_source") }}, output_mappings=[OutputDefinition().mapping_from("A")], ) comp_a_outer = CompositeSolidDefinition( name="comp_a_outer", solid_defs=[comp_a_inner], output_mappings=[OutputDefinition().mapping_from("comp_a_inner")], ) comp_bc_inner = CompositeSolidDefinition( name="comp_bc_inner", solid_defs=[node_b, node_c], dependencies={"C": { "B": DependencyDefinition("B") }}, input_mappings=[ InputDefinition(name="inner_B_in").mapping_to(solid_name="B", input_name="A") ], ) comp_bc_outer = CompositeSolidDefinition( name="comp_bc_outer", solid_defs=[comp_bc_inner], dependencies={}, input_mappings=[ InputDefinition(name="outer_B_in").mapping_to( solid_name="comp_bc_inner", input_name="inner_B_in") ], ) @pipeline def wrapped_io(): comp_bc_outer(comp_a_outer()) result = execute_pipeline(wrapped_io) assert result.success
def test_mapped_composite_input_expectations(): called = {} def exp_a(_c, _v): called['exp_a'] = True return ExpectationResult(True) @solid(inputs=[InputDefinition('one', expectations=[ExpectationDefinition('exp_a', exp_a)])]) def node_a(_context, one): called['node_a'] = True assert one is 1 def inner_exp(_c, _v): called['inner_exp'] = True return ExpectationResult(True) def outer_exp(_c, _v): called['outer_exp'] = True return ExpectationResult(True) inner = CompositeSolidDefinition( name='inner', solids=[node_a], input_mappings=[ InputDefinition( name='inner_one', expectations=[ExpectationDefinition('inner_exp', inner_exp)] ).mapping_to('node_a', 'one') ], ) outer = CompositeSolidDefinition( name='outer', solids=[inner], input_mappings=[ InputDefinition( 'outer_one', expectations=[ExpectationDefinition('outer_exp', outer_exp)] ).mapping_to('inner', 'inner_one') ], ) pipeline = PipelineDefinition(name='composites_pipeline', solids=[outer]) result = execute_pipeline( pipeline, {'solids': {'outer': {'inputs': {'outer_one': {'value': 1}}}}} ) assert result.success assert called['node_a'] assert called['exp_a'] assert called['inner_exp'] assert called['outer_exp']
def test_mapped_composite_config_input(): called = {} @solid(input_defs=[InputDefinition("one")]) def node_a(_context, one): called["node_a"] = True assert one == 1 @composite_solid def inner(inner_one): node_a(inner_one) outer = CompositeSolidDefinition( name="outer", solid_defs=[inner], input_mappings=[ InputDefinition("outer_one").mapping_to("inner", "inner_one") ], ) pipe = PipelineDefinition(name="composites_pipeline", solid_defs=[outer]) result = execute_pipeline( pipe, {"solids": { "outer": { "inputs": { "outer_one": { "value": 1 } } } }}) assert result.success assert called["node_a"]
def test_types_descent(): @dagster_type class Foo(object): pass @solid(outputs=[OutputDefinition(Foo)]) def inner_solid(_context): return Foo() middle_solid = CompositeSolidDefinition(name='middle_solid', solid_defs=[inner_solid]) outer_solid = CompositeSolidDefinition(name='outer_solid', solid_defs=[middle_solid]) pipe = PipelineDefinition(name='layered_types', solid_defs=[outer_solid]) assert pipe.has_runtime_type('Foo')
def test_mapped_composite_config_input(): called = {} @solid(input_defs=[InputDefinition('one')]) def node_a(_context, one): called['node_a'] = True assert one == 1 @composite_solid def inner(inner_one): node_a(inner_one) outer = CompositeSolidDefinition( name='outer', solid_defs=[inner], input_mappings=[ InputDefinition('outer_one').mapping_to('inner', 'inner_one') ], ) pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer]) result = execute_pipeline( pipe, {'solids': { 'outer': { 'inputs': { 'outer_one': { 'value': 1 } } } }}) assert result.success assert called['node_a']
def define_composite_materialization_pipeline( should_require_resources=True, resources_initted=None ): if resources_initted is None: resources_initted = {} @resource def resource_a(_): resources_initted['a'] = True yield 'A' @dagster_type_materializer( String, required_resource_keys={'a'} if should_require_resources else set() ) def materialize(context, *_args, **_kwargs): assert context.resources.a == 'A' return AssetMaterialization('hello') CustomDagsterType = create_any_type(name='CustomType', materializer=materialize) @solid(output_defs=[OutputDefinition(CustomDagsterType)]) def output_solid(_context): return 'hello' wrap_solid = CompositeSolidDefinition( name="wrap_solid", solid_defs=[output_solid], output_mappings=[OutputDefinition(CustomDagsterType).mapping_from('output_solid')], ) @pipeline(mode_defs=[ModeDefinition(resource_defs={'a': resource_a})]) def output_pipeline(): wrap_solid() return output_pipeline
def define_composites_pipeline(): @lambda_solid(inputs=[InputDefinition('num', Int)]) def add_one(num): return num + 1 @lambda_solid(inputs=[InputDefinition('num')]) def div_two(num): return num / 2 add_two = CompositeSolidDefinition( 'add_two', solids=[add_one], dependencies={ SolidInstance('add_one', 'adder_1'): {}, SolidInstance('add_one', 'adder_2'): {'num': DependencyDefinition('adder_1')}, }, input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')], output_mappings=[OutputDefinition(Int).mapping_from('adder_2')], ) add_four = CompositeSolidDefinition( 'add_four', solids=[add_two], dependencies={ SolidInstance('add_two', 'adder_1'): {}, SolidInstance('add_two', 'adder_2'): {'num': DependencyDefinition('adder_1')}, }, input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')], output_mappings=[OutputDefinition(Int).mapping_from('adder_2')], ) div_four = CompositeSolidDefinition( 'div_four', solids=[div_two], dependencies={ SolidInstance('div_two', 'div_1'): {}, SolidInstance('div_two', 'div_2'): {'num': DependencyDefinition('div_1')}, }, input_mappings=[InputDefinition('num', Int).mapping_to('div_1', 'num')], output_mappings=[OutputDefinition(Float).mapping_from('div_2')], ) return PipelineDefinition( name='composites_pipeline', solids=[add_four, div_four], dependencies={'div_four': {'num': DependencyDefinition('add_four')}}, )
def test_composite_config(): called = {} @solid(config_field=Field(String)) def configured(context): called['configured'] = True assert context.solid_config is 'yes' inner = CompositeSolidDefinition(name='inner', solid_defs=[configured]) outer = CompositeSolidDefinition(name='outer', solid_defs=[inner]) pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer]) result = execute_pipeline( pipe, {'solids': {'outer': {'solids': {'inner': {'solids': {'configured': {'config': 'yes'}}}}}}}, ) assert result.success assert called['configured']
def test_composite_config_input(): called = {} @solid(inputs=[InputDefinition('one')]) def node_a(_context, one): called['node_a'] = True assert one is 1 inner = CompositeSolidDefinition(name='inner', solid_defs=[node_a]) outer = CompositeSolidDefinition(name='outer', solid_defs=[inner]) pipe = PipelineDefinition(name='composites_pipeline', solid_defs=[outer]) result = execute_pipeline( pipe, { 'solids': { 'outer': { 'solids': {'inner': {'solids': {'node_a': {'inputs': {'one': {'value': 1}}}}}} } } }, ) assert result.success assert called['node_a']
def test_cycle_detect(): @lambda_solid def return_one(): return 1 @lambda_solid def add(a, b): return a + b with pytest.raises(DagsterInvalidDefinitionError, match="Circular dependencies exist"): PipelineDefinition( solid_defs=[return_one, add], name="test", dependencies={ NodeInvocation("add", alias="first"): { "a": DependencyDefinition("return_one"), "b": DependencyDefinition("second"), }, NodeInvocation("add", alias="second"): { "a": DependencyDefinition("first"), "b": DependencyDefinition("return_one"), }, }, ) with pytest.raises(DagsterInvalidDefinitionError, match="Circular dependencies exist"): CompositeSolidDefinition( name="circletron", solid_defs=[return_one, add], dependencies={ NodeInvocation("add", alias="first"): { "a": DependencyDefinition("return_one"), "b": DependencyDefinition("second"), }, NodeInvocation("add", alias="second"): { "a": DependencyDefinition("first"), "b": DependencyDefinition("return_one"), }, }, )
def test_cycle_detect(): @lambda_solid def return_one(): return 1 @lambda_solid def add(a, b): return a + b with pytest.raises(DagsterInvalidDefinitionError, match="Circular dependencies exist"): PipelineDefinition( solid_defs=[return_one, add], dependencies={ SolidInvocation('add', alias='first'): { 'a': DependencyDefinition('return_one'), 'b': DependencyDefinition('second'), }, SolidInvocation('add', alias='second'): { 'a': DependencyDefinition('first'), 'b': DependencyDefinition('return_one'), }, }, ) with pytest.raises(DagsterInvalidDefinitionError, match="Circular dependencies exist"): CompositeSolidDefinition( name='circletron', solid_defs=[return_one, add], dependencies={ SolidInvocation('add', alias='first'): { 'a': DependencyDefinition('return_one'), 'b': DependencyDefinition('second'), }, SolidInvocation('add', alias='second'): { 'a': DependencyDefinition('first'), 'b': DependencyDefinition('return_one'), }, }, )
def test_composite_mapping_collision(): @lambda_solid def return_one(): return 1 @lambda_solid def add(a, b): return a + b with pytest.raises(DagsterInvalidDefinitionError, match="already satisfied by output"): CompositeSolidDefinition( name="add_one", solid_defs=[return_one, add], input_mappings=[InputDefinition("val").mapping_to("add", "a")], dependencies={ "add": { "a": DependencyDefinition("return_one"), "b": DependencyDefinition("return_one"), } }, )
def test_mapping_errors(): @lambda_solid def echo(foo): return foo with pytest.raises( DagsterInvalidDefinitionError, match="references solid 'inner' which it does not contain"): CompositeSolidDefinition( name='bad', solid_defs=[echo], input_mappings=[ InputDefinition('mismatch').mapping_to('inner', 'foo') ], ) with pytest.raises(DagsterInvalidDefinitionError, match="no input named 'bar'"): CompositeSolidDefinition( name='bad', solid_defs=[echo], input_mappings=[ InputDefinition('mismatch').mapping_to('echo', 'bar') ], ) with pytest.raises( DagsterInvalidDefinitionError, match="InputMapping source and destination must have the same type", ): CompositeSolidDefinition( name='bad', solid_defs=[echo], input_mappings=[ InputDefinition('mismatch', str).mapping_to('echo', 'foo') ], ) with pytest.raises( DagsterInvalidDefinitionError, match= "mappings with same definition name but different definitions", ): CompositeSolidDefinition( name='bad', solid_defs=[echo], input_mappings=[ InputDefinition('mismatch').mapping_to('echo', 'foo'), InputDefinition('mismatch').mapping_to('echo_2', 'foo'), ], ) with pytest.raises( DagsterInvalidDefinitionError, match="references solid 'inner' which it does not contain"): CompositeSolidDefinition( name='bad', solid_defs=[echo], output_mappings=[ OutputDefinition().mapping_from('inner', 'result') ], ) with pytest.raises(DagsterInvalidDefinitionError, match="no output named 'return'"): CompositeSolidDefinition( name='bad', solid_defs=[echo], output_mappings=[ OutputDefinition().mapping_from('echo', 'return') ], ) with pytest.raises( DagsterInvalidDefinitionError, match= "OutputMapping source and destination must have the same type", ): CompositeSolidDefinition( name='bad', solid_defs=[echo], output_mappings=[ OutputDefinition(str).mapping_from('echo', 'result') ], )
fraction=context.solid_config['subsample_pct'] / 100.0) s3_to_df = CompositeSolidDefinition( name='s3_to_df', solids=[download_from_s3_to_bytes, unzip_file, ingest_csv_to_spark], dependencies={ 'unzip_file': { 'archive_file': DependencyDefinition('download_from_s3_to_bytes') }, 'ingest_csv_to_spark': { 'input_csv_file': DependencyDefinition('unzip_file') }, }, input_mappings=[ InputDefinition('bucket_data', S3BucketData).mapping_to('download_from_s3_to_bytes', 'bucket_data'), InputDefinition('archive_member', String).mapping_to('unzip_file', 'archive_member'), ], output_mappings=[ OutputDefinition(SparkDataFrameType).mapping_from( 'ingest_csv_to_spark') ], ) s3_to_dw_table = CompositeSolidDefinition( name='s3_to_dw_table', solids=[
@lambda_solid(input_defs=[InputDefinition('num', Int)]) def add_one(num): return num + 1 @lambda_solid(input_defs=[InputDefinition('num')]) def div_two(num): return num / 2 add_two = CompositeSolidDefinition( 'add_two', solid_defs=[add_one], dependencies={ SolidInvocation('add_one', 'adder_1'): {}, SolidInvocation('add_one', 'adder_2'): { 'num': DependencyDefinition('adder_1') }, }, input_mappings=[InputDefinition('num', Int).mapping_to('adder_1', 'num')], output_mappings=[OutputDefinition(Int).mapping_from('adder_2')], ) add_four = CompositeSolidDefinition( 'add_four', solid_defs=[add_two], dependencies={ SolidInvocation('add_two', 'adder_1'): {}, SolidInvocation('add_two', 'adder_2'): { 'num': DependencyDefinition('adder_1') }, },
def build_composite_solid_definition(self, name, assets_to_update, include_nothing_input=False): """Build a composite solid definition for the assets in `assets_to_update`. By default the composite solid will not accept any inputs. If you need to run this composite _after_ other solids have run, pass `include_nothing_input=True`, which will create a single input of type `Nothing` to the composite solid, and a mapping to each 'source' asset input (i.e. those assets without `compute_fn`s, such as those created by `source_asset` or `source_table`). Examples: .. code-block:: python @solid(required_resource_keys={"filesystem", "pyspark"}) def save_orders(context) -> Nothing: orders = context.resources.pyspark.spark_session.createDataFrame([ Row(id=1, name="foo"), Row(id=2, name="bar"), Row(id=3, name="baz"), ]) path = context.resources.filesystem.get_fs_path(("orders.csv",)) orders.write.format("csv").options(header="true").save(path, mode="overwrite") orders_asset = source_asset(path="orders.csv") @computed_asset(input_assets=[orders_asset]) def orders_top1_asset(orders: DataFrame) -> DataFrame: return orders.limit(1) run_lakehouse = lakehouse.build_composite_solid_definition( name="lakehouse_solid", assets_to_update=[orders_top1_asset], include_nothing_input=True, ) @pipeline(mode_defs=[mode_def], preset_defs=[preset_def]) def simple_pipeline(): run_lakehouse(save_orders()) # If you have multiple solids which need to run first: @lambda_solid def other_side_effect() -> Nothing: # Perhaps this writes to a database or some other required source table. pass @lambda_solid( input_defs=[InputDefinition("orders", Nothing), InputDefinition("other", Nothing)] ) def wait_until_complete() -> Nothing: pass @pipeline(mode_defs=[mode_def], preset_defs=[preset_def]) def pipeline_multi_deps(): completed = wait_until_complete(orders=save_orders(), other=other_side_effect()) run_lakehouse(completed) """ solid_defs, solid_deps = self._get_solid_deps_and_defs( assets_to_update, include_nothing_input) if include_nothing_input: # Map a single `InputDefinition`, of type `Nothing`, to every # solid in the solid definitions we just created that have an input # named 'nothing'. # Ideally we'd do this based on something more explicit than the names and types # of solid inputs, but it's vanishingly unlikely that users will return `Nothing` # from an asset, since there would be nothing to save to the Lakehouse that way. nothing_input = InputDefinition("nothing", Nothing) input_mappings = [ nothing_input.mapping_to("__".join(solid_name), "nothing") for solid_name, solid_def in solid_defs.items() if solid_def.input_defs[0].name == "nothing" and solid_def.input_defs[0].dagster_type.is_nothing ] else: input_mappings = None return CompositeSolidDefinition( name=name, solid_defs=list(solid_defs.values()), dependencies=solid_deps, input_mappings=input_mappings, )
's3': s3_resource, 'db_info': redshift_db_info_resource, 'tempfile': tempfile_resource, }, ) process_on_time_data = CompositeSolidDefinition( name='process_on_time_data', solids=[s3_to_df, join_q2_data, load_data_to_database_from_spark], dependencies={ SolidInstance('s3_to_df', alias='april_on_time_s3_to_df'): {}, SolidInstance('s3_to_df', alias='may_on_time_s3_to_df'): {}, SolidInstance('s3_to_df', alias='june_on_time_s3_to_df'): {}, SolidInstance('s3_to_df', alias='master_cord_s3_to_df'): {}, 'join_q2_data': { 'april_data': DependencyDefinition('april_on_time_s3_to_df'), 'may_data': DependencyDefinition('may_on_time_s3_to_df'), 'june_data': DependencyDefinition('june_on_time_s3_to_df'), 'master_cord_data': DependencyDefinition('master_cord_s3_to_df'), }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_on_time_data'): { 'data_frame': DependencyDefinition('join_q2_data') }, }, ) sfo_weather_data = CompositeSolidDefinition( name='sfo_weather_data', solids=[ download_from_s3_to_bytes, ingest_csv_to_spark,