def test_pipeline_types(): ContextOneConfigDict = types.ConfigDictionary( 'ContextOneConfigDict', {'field_one': Field(types.String)}, ) SolidOneConfigDict = types.ConfigDictionary( 'SolidOneConfigDict', {'another_field': Field(types.Int)}, ) @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', types.String)], outputs=[OutputDefinition(types.Any)], config_def=ConfigDefinition(SolidOneConfigDict), ) def solid_one(_info, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string'), } }, context_definitions={ 'context_one': PipelineContextDefinition( context_fn=lambda: None, config_def=ConfigDefinition(ContextOneConfigDict), ) }) present_types = [ SolidOneConfigDict, ContextOneConfigDict, types.String, types.Any, types.Int, ] for present_type in present_types: name = present_type.name assert pipeline_def.has_type(name) assert pipeline_def.type_named(name).name == name not_present_types = [ types.Bool, types.Dict, types.PythonObjectType('Duisjdfke', dict), ] for not_present_type in not_present_types: assert not pipeline_def.has_type(not_present_type.name)
def test_aliased_configs(): @solid( inputs=[], config_def=ConfigDefinition(types.Int), ) def load_constant(info): return info.config pipeline = PipelineDefinition( solids=[load_constant], dependencies={ SolidInstance(load_constant.name, 'load_a'): {}, SolidInstance(load_constant.name, 'load_b'): {}, }) result = execute_pipeline( pipeline, config.Environment(solids={ 'load_a': config.Solid(2), 'load_b': config.Solid(3), })) assert result.success assert result.result_for_solid('load_a').transformed_value() == 2 assert result.result_for_solid('load_b').transformed_value() == 3
def define_pandas_source_test_solid(): return dm.define_dagstermill_solid( name='pandas_source_test', notebook_path=nb_test_path('pandas_source_test'), inputs=[], outputs=[OutputDefinition(DataFrame)], config_def=ConfigDefinition(types.String), )
def _nested_optional_config_with_no_default(): return ConfigDefinition(config_type=types.ConfigDictionary({ 'nested': Field(dagster_type=types.ConfigDictionary( {'int_field': Field( types.Int, is_optional=True, )})), }))
def define_solid_with_stuff(): return SolidDefinition( name='stuff', inputs=[InputDefinition('foo', types.Int)], outputs=[OutputDefinition(name='bar', dagster_type=types.Int)], config_def=ConfigDefinition(types.Int), transform_fn=lambda *args, **kwargs: check.failed('do not execute'), metadata={'notebook_path': 'unused.ipynb'}, )
def define_contextful_solids(): @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_a(info): info.context.resources.store.record_value(info.context, 'a', info.config) return info.config @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_b(info): info.context.resources.store.record_value(info.context, 'b', info.config) return info.config @solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], outputs=[OutputDefinition(types.Int)], ) def add_ints(info, num_one, num_two): result = num_one + num_two info.context.resources.store.record_value(info.context, 'add', result) return result @solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], outputs=[OutputDefinition(types.Int)], ) def mult_ints(info, num_one, num_two): result = num_one * num_two info.context.resources.store.record_value(info.context, 'mult', result) return result return [injest_a, injest_b, add_ints, mult_ints]
def test_double_type(): @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_one(_info): raise Exception('should not execute') @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_two(_info): raise Exception('should not execute') with pytest.raises(DagsterInvalidDefinitionError, match='Type names must be unique.'): PipelineDefinition(solids=[solid_one, solid_two])
def to_parquet_solid(name): def _t_fn(info, inputs): inputs['df'].to_parquet(info.config['path']) return SolidDefinition( name=name, inputs=[InputDefinition('df', DataFrame)], outputs=[], config_def=ConfigDefinition(WriteDataFrameConfigDict), transform_fn=_t_fn, )
def load_csv_solid(name): check.str_param(name, 'name') def _t_fn(info, _inputs): yield Result(pd.read_csv(info.config['path'])) return SolidDefinition( name=name, inputs=[], outputs=[OutputDefinition(DataFrame)], transform_fn=_t_fn, config_def=ConfigDefinition(LoadDataFrameConfigDict), )
def _single_nested_config(): return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', { 'nested': Field( dagster_type=types.ConfigDictionary( 'NestedType', {'int_field': Field(types.Int)}, ) ), } ) )
def define_contextless_solids(): @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_a(info): return info.config @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_b(info): return info.config @lambda_solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], output=OutputDefinition(types.Int), ) def add_ints(num_one, num_two): return num_one + num_two @lambda_solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], output=OutputDefinition(types.Int), ) def mult_ints(num_one, num_two): return num_one * num_two return [injest_a, injest_b, add_ints, mult_ints]
def define_pass_value_solid(name, description=None): check.str_param(name, 'name') check.opt_str_param(description, 'description') def _value_t_fn(info, _inputs): yield Result(info.config['value']) return SolidDefinition( name=name, description=description, inputs=[], outputs=[OutputDefinition(types.String)], config_def=ConfigDefinition(SingleValueDict), transform_fn=_value_t_fn, )
def _nested_optional_config_with_no_default(): nested_type = types.ConfigDictionary( 'NestedType', { 'int_field': Field( types.Int, is_optional=True, ), }, ) return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', {'nested': Field(dagster_type=nested_type)}, ) )
def test_any_config_definition(): called = {} conf_value = 234 @solid(config_def=ConfigDefinition()) def hello_world(info): assert info.config == conf_value called['yup'] = True result = execute_single_solid( create_test_context(), hello_world, environment=config.Environment( solids={'hello_world': config.Solid(conf_value)})) assert called['yup']
def define_create_table_solid(name): def _materialization_fn(info, inputs): sql_expr = inputs['expr'] check.inst(sql_expr, DagsterSqlExpression) output_table_name = check.str_elem(info.config, 'table_name') total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format( output_table_name=output_table_name, query_text=sql_expr.query_text ) info.context.resources.sa.engine.connect().execute(total_sql) return SolidDefinition( name=name, inputs=[InputDefinition('expr')], outputs=[], transform_fn=_materialization_fn, config_def=ConfigDefinition(CreateTableConfigDict), )
def _nested_optional_config_with_default(): return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', { 'nested': Field( dagster_type=types.ConfigDictionary( 'NestedType', {'int_field': Field( types.Int, is_optional=True, default_value=3, )} ) ), } ) )
def test_hello_world_config(): with_config_solid = dm.define_dagstermill_solid( 'with_config', nb_test_path('hello_world_with_config'), [], [OutputDefinition()], config_def=ConfigDefinition(types.String), ) pipeline = PipelineDefinition(solids=[with_config_solid]) pipeline_result = execute_pipeline( pipeline, config.Environment( solids={ 'with_config': config.Solid(script_relative_path('num.csv')) }), ) assert pipeline_result.success assert pipeline_result.result_for_solid( 'with_config').transformed_value() == 100
def define_part_nine_final(): return PipelineDefinition( name='part_nine_final', solids=define_contextful_solids(), dependencies={ 'add_ints': { 'num_one': DependencyDefinition('injest_a'), 'num_two': DependencyDefinition('injest_b'), }, 'mult_ints': { 'num_one': DependencyDefinition('injest_a'), 'num_two': DependencyDefinition('injest_b'), }, }, context_definitions={ 'local': PipelineContextDefinition( context_fn=lambda *_args: ExecutionContext.console_logging( log_level=DEBUG, resources=PartNineResources(InMemoryStore()))), 'cloud': PipelineContextDefinition( context_fn=lambda info: ExecutionContext.console_logging( resources=PartNineResources( PublicCloudStore(info.config['credentials']))), config_def=ConfigDefinition(config_type=types.ConfigDictionary( name='CloudConfigDict', fields={ 'credentials': Field( types.ConfigDictionary( name='CredentialsConfigDict', fields={ 'user': Field(types.String), 'pass': Field(types.String), })), })), ) })
from dagster import ( ConfigDefinition, DependencyDefinition, InputDefinition, OutputDefinition, PipelineDefinition, SolidInstance, config, execute_pipeline, lambda_solid, solid, types, ) @solid(config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)]) def load_a(info): return info.config @solid(config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)]) def load_b(info): return info.config @lambda_solid( inputs=[ InputDefinition('a', types.Int), InputDefinition('b', types.Int), ], output=OutputDefinition(types.Int),
def test_noop_config(): assert ConfigDefinition(types.Any)
ConfigDefinition, DependencyDefinition, InputDefinition, OutputDefinition, PipelineDefinition, SolidInstance, define_stub_solid, execute_pipeline, lambda_solid, solid, types, ) @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def load_number(info): return info.config @lambda_solid( inputs=[ InputDefinition('num1', types.Int), InputDefinition('num2', types.Int), ], output=OutputDefinition(types.Int), ) def adder(num1, num2): return num1 + num2