def test_custom_contexts(): @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ), 'custom_two': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ) }, ) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) environment_two = config.Environment( context=config.Context('custom_two', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_two)
def test_pipeline_types(): ContextOneConfigDict = types.ConfigDictionary( 'ContextOneConfigDict', {'field_one': Field(types.String)}, ) SolidOneConfigDict = types.ConfigDictionary( 'SolidOneConfigDict', {'another_field': Field(types.Int)}, ) @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', types.String)], outputs=[OutputDefinition(types.Any)], config_def=ConfigDefinition(SolidOneConfigDict), ) def solid_one(_info, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string'), } }, context_definitions={ 'context_one': PipelineContextDefinition( context_fn=lambda: None, config_def=ConfigDefinition(ContextOneConfigDict), ) }) present_types = [ SolidOneConfigDict, ContextOneConfigDict, types.String, types.Any, types.Int, ] for present_type in present_types: name = present_type.name assert pipeline_def.has_type(name) assert pipeline_def.type_named(name).name == name not_present_types = [ types.Bool, types.Dict, types.PythonObjectType('Duisjdfke', dict), ] for not_present_type in not_present_types: assert not pipeline_def.has_type(not_present_type.name)
def _multiple_required_fields_config_dict(): return ConfigDefinition.config_dict( 'MultipleRequiredFields', { 'field_one': Field(types.String), 'field_two': Field(types.String), } )
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(info): assert info.context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict({ 'field_one': Field( dagster_type=types.String, is_optional=True, default_value='heyo', ) }), context_fn=lambda info: ExecutionContext(resources=info.config ), ), }) execute_pipeline(pipeline, environment=config.Environment( context=config.Context('custom_one', {})))
def test_default_arg(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int, default_value=2, is_optional=True), }) assert config_def.config_type.evaluate_value({}) == {'int_field': 2}
def test_basic_solid_with_config(): did_get = {} def _t_fn(info, _inputs): did_get['yep'] = info.config solid = SolidDefinition( name='solid_with_context', inputs=[], outputs=[], config_def=ConfigDefinition.config_dict({ 'some_config': Field(types.String) }), transform_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid]) execute_pipeline( pipeline, config.Environment(solids={'solid_with_context': config.Solid({ 'some_config': 'foo' })}), ) assert 'yep' in did_get assert 'some_config' in did_get['yep']
def _single_optional_string_field_config_dict_with_default(): optional_field_def = Field( types.String, is_optional=True, default_value='some_default', ) return ConfigDefinition.config_dict({'optional_field': optional_field_def})
def test_aliased_configs(): @solid( inputs=[], config_def=ConfigDefinition(types.Int), ) def load_constant(info): return info.config pipeline = PipelineDefinition( solids=[load_constant], dependencies={ SolidInstance(load_constant.name, 'load_a'): {}, SolidInstance(load_constant.name, 'load_b'): {}, }) result = execute_pipeline( pipeline, config.Environment(solids={ 'load_a': config.Solid(2), 'load_b': config.Solid(3), })) assert result.success assert result.result_for_solid('load_a').transformed_value() == 2 assert result.result_for_solid('load_b').transformed_value() == 3
def test_yield_context(): events = [] @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} assert info.context._context_dict['foo'] == 'bar' # pylint: disable=W0212 events.append('during') def _yield_context(info): events.append('before') context = ExecutionContext(resources=info.config) with context.value('foo', 'bar'): yield context events.append('after') pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=_yield_context, ), }) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) assert events == ['before', 'during', 'after']
def define_pandas_source_test_solid(): return dm.define_dagstermill_solid( name='pandas_source_test', notebook_path=nb_test_path('pandas_source_test'), inputs=[], outputs=[OutputDefinition(DataFrame)], config_def=ConfigDefinition(types.String), )
def test_int_field(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int), }) assert config_def.config_type.evaluate_value({'int_field': 1}) == { 'int_field': 1 }
def test_int_field(): config_def = ConfigDefinition.config_dict( 'SingleRequiredInt', { 'int_field': Field(types.Int), }, ) assert config_def.config_type.evaluate_value({'int_field': 1}) == {'int_field': 1}
def define_solid_with_stuff(): return SolidDefinition( name='stuff', inputs=[InputDefinition('foo', types.Int)], outputs=[OutputDefinition(name='bar', dagster_type=types.Int)], config_def=ConfigDefinition(types.Int), transform_fn=lambda *args, **kwargs: check.failed('do not execute'), metadata={'notebook_path': 'unused.ipynb'}, )
def _nested_optional_config_with_no_default(): return ConfigDefinition(config_type=types.ConfigDictionary({ 'nested': Field(dagster_type=types.ConfigDictionary( {'int_field': Field( types.Int, is_optional=True, )})), }))
def define_contextful_solids(): @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_a(info): info.context.resources.store.record_value(info.context, 'a', info.config) return info.config @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_b(info): info.context.resources.store.record_value(info.context, 'b', info.config) return info.config @solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], outputs=[OutputDefinition(types.Int)], ) def add_ints(info, num_one, num_two): result = num_one + num_two info.context.resources.store.record_value(info.context, 'add', result) return result @solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], outputs=[OutputDefinition(types.Int)], ) def mult_ints(info, num_one, num_two): result = num_one * num_two info.context.resources.store.record_value(info.context, 'mult', result) return result return [injest_a, injest_b, add_ints, mult_ints]
def test_int_fails(): config_def = ConfigDefinition.config_dict({ 'int_field': Field(types.Int), }) with pytest.raises(DagsterEvaluateValueError): config_def.config_type.evaluate_value({'int_field': 'fjkdj'}) with pytest.raises(DagsterEvaluateValueError): config_def.config_type.evaluate_value({'int_field': True})
def test_double_type(): @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_one(_info): raise Exception('should not execute') @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_two(_info): raise Exception('should not execute') with pytest.raises(DagsterInvalidDefinitionError, match='Type names must be unique.'): PipelineDefinition(solids=[solid_one, solid_two])
def to_parquet_solid(name): def _t_fn(info, inputs): inputs['df'].to_parquet(info.config['path']) return SolidDefinition( name=name, inputs=[InputDefinition('df', DataFrame)], outputs=[], config_def=ConfigDefinition(WriteDataFrameConfigDict), transform_fn=_t_fn, )
def _mixed_required_optional_string_config_dict_with_default(): return ConfigDefinition.config_dict( 'MixedRequired', { 'optional_arg': Field( types.String, is_optional=True, default_value='some_default', ), 'required_arg': Field(types.String, is_optional=False), 'optional_arg_no_default': Field(types.String, is_optional=True), } )
def test_invalid_context(): @lambda_solid def never_transform(): raise Exception('should never execute') default_context_pipeline = PipelineDefinition(solids=[never_transform]) environment_context_not_found = config.Environment( context=config.Context('not_found', {})) with pytest.raises(DagsterInvariantViolationError, message='Context not_found not found'): execute_pipeline(default_context_pipeline, environment=environment_context_not_found, throw_on_error=True) environment_field_name_mismatch = config.Environment( context=config.Context(config={'unexpected': 'value'})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(default_context_pipeline, environment=environment_field_name_mismatch, throw_on_error=True) with_argful_context_pipeline = PipelineDefinition( solids=[never_transform], context_definitions={ 'default': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'string_field': Field(types.String)}), context_fn=lambda info: info.config, ) }) environment_no_config_error = config.Environment(context=config.Context( config={})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_no_config_error, throw_on_error=True) environment_type_mismatch_error = config.Environment( context=config.Context(config={'string_field': 1})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_type_mismatch_error, throw_on_error=True)
def define_read_csv_solid(name): def _t_fn(info, _inputs): yield Result(pd.read_csv(info.config['path'])) return SolidDefinition( name=name, inputs=[], outputs=[OutputDefinition()], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn )
def define_to_csv_solid(name): def _t_fn(info, inputs): inputs['df'].to_csv(info.config['path'], index=False) return SolidDefinition( name=name, inputs=[InputDefinition('df')], outputs=[], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn, )
def to_parquet_solid(name): def _t_fn(info, inputs): inputs['df'].to_parquet(info.config['path']) return SolidDefinition( name=name, inputs=[InputDefinition('df', DataFrame)], outputs=[], config_def=ConfigDefinition.config_dict({ 'path': Field(types.Path) }), transform_fn=_t_fn, )
def load_csv_solid(name): check.str_param(name, 'name') def _t_fn(info, _inputs): yield Result(pd.read_csv(info.config['path'])) return SolidDefinition( name=name, inputs=[], outputs=[OutputDefinition(DataFrame)], transform_fn=_t_fn, config_def=ConfigDefinition(LoadDataFrameConfigDict), )
def _single_nested_config(): return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', { 'nested': Field( dagster_type=types.ConfigDictionary( 'NestedType', {'int_field': Field(types.Int)}, ) ), } ) )
def define_contextless_solids(): @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_a(info): return info.config @solid( config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)], ) def injest_b(info): return info.config @lambda_solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], output=OutputDefinition(types.Int), ) def add_ints(num_one, num_two): return num_one + num_two @lambda_solid( inputs=[ InputDefinition('num_one', types.Int), InputDefinition('num_two', types.Int) ], output=OutputDefinition(types.Int), ) def mult_ints(num_one, num_two): return num_one * num_two return [injest_a, injest_b, add_ints, mult_ints]
def define_pass_value_solid(name, description=None): check.str_param(name, 'name') check.opt_str_param(description, 'description') def _value_t_fn(info, _inputs): yield Result(info.config['value']) return SolidDefinition( name=name, description=description, inputs=[], outputs=[OutputDefinition(types.String)], config_def=ConfigDefinition(SingleValueDict), transform_fn=_value_t_fn, )
def test_any_config_definition(): called = {} conf_value = 234 @solid(config_def=ConfigDefinition()) def hello_world(info): assert info.config == conf_value called['yup'] = True result = execute_single_solid( create_test_context(), hello_world, environment=config.Environment( solids={'hello_world': config.Solid(conf_value)})) assert called['yup']
def _nested_optional_config_with_no_default(): nested_type = types.ConfigDictionary( 'NestedType', { 'int_field': Field( types.Int, is_optional=True, ), }, ) return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', {'nested': Field(dagster_type=nested_type)}, ) )
def define_create_table_solid(name): def _materialization_fn(info, inputs): sql_expr = inputs['expr'] check.inst(sql_expr, DagsterSqlExpression) output_table_name = check.str_elem(info.config, 'table_name') total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format( output_table_name=output_table_name, query_text=sql_expr.query_text ) info.context.resources.sa.engine.connect().execute(total_sql) return SolidDefinition( name=name, inputs=[InputDefinition('expr')], outputs=[], transform_fn=_materialization_fn, config_def=ConfigDefinition(CreateTableConfigDict), )