def test_stacked_resource_cleanup(): called = [] def _cleanup_resource_fn_1(_init_context): called.append("creation_1") yield True called.append("cleanup_1") def _cleanup_resource_fn_2(_init_context): called.append("creation_2") yield True called.append("cleanup_2") @solid(required_resource_keys={"resource_with_cleanup_1", "resource_with_cleanup_2"}) def check_resource_created(context): called.append("solid") assert context.resources.resource_with_cleanup_1 is True assert context.resources.resource_with_cleanup_2 is True pipeline = PipelineDefinition( name="test_resource_cleanup", solid_defs=[check_resource_created], mode_defs=[ ModeDefinition( resource_defs={ "resource_with_cleanup_1": ResourceDefinition(_cleanup_resource_fn_1), "resource_with_cleanup_2": ResourceDefinition(_cleanup_resource_fn_2), } ) ], ) execute_pipeline(pipeline) assert called == ["creation_1", "creation_2", "solid", "cleanup_2", "cleanup_1"]
def test_stacked_resource_cleanup(): called = [] def _cleanup_resource_fn_1(_init_context): called.append('creation_1') yield True called.append('cleanup_1') def _cleanup_resource_fn_2(_init_context): called.append('creation_2') yield True called.append('cleanup_2') @solid(required_resource_keys={'resource_with_cleanup_1', 'resource_with_cleanup_2'}) def check_resource_created(context): called.append('solid') assert context.resources.resource_with_cleanup_1 is True assert context.resources.resource_with_cleanup_2 is True pipeline = PipelineDefinition( name='test_resource_cleanup', solid_defs=[check_resource_created], mode_defs=[ ModeDefinition( resource_defs={ 'resource_with_cleanup_1': ResourceDefinition(_cleanup_resource_fn_1), 'resource_with_cleanup_2': ResourceDefinition(_cleanup_resource_fn_2), } ) ], ) execute_pipeline(pipeline) assert called == ['creation_1', 'creation_2', 'solid', 'cleanup_2', 'cleanup_1']
def test_mixed_multiple_resources(): called = {} saw = [] @solid(required_resource_keys={'returned_string', 'yielded_string'}) def a_solid(context): called['yup'] = True assert context.resources.returned_string == 'foo' assert context.resources.yielded_string == 'bar' def _do_yield_resource(init_context): saw.append('before yield ' + init_context.resource_config) yield init_context.resource_config saw.append('after yield ' + init_context.resource_config) yield_string_resource = ResourceDefinition(config_field=Field(String), resource_fn=_do_yield_resource) def _do_return_resource(init_context): saw.append('before return ' + init_context.resource_config) return init_context.resource_config return_string_resource = ResourceDefinition( config_field=Field(String), resource_fn=_do_return_resource) pipeline_def = PipelineDefinition( name='with_a_yield_resource', solid_defs=[a_solid], mode_defs=[ ModeDefinition( resource_defs={ 'yielded_string': yield_string_resource, 'returned_string': return_string_resource, }) ], ) result = execute_pipeline( pipeline_def, { 'resources': { 'returned_string': { 'config': 'foo' }, 'yielded_string': { 'config': 'bar' } } }, ) assert result.success assert called['yup'] # could be processed in any order in python 2 assert 'before yield bar' in saw[0] or 'before return foo' in saw[0] assert 'before yield bar' in saw[1] or 'before return foo' in saw[1] assert 'after yield bar' in saw[2]
def test_mixed_multiple_resources(): called = {} saw = [] @solid(required_resource_keys={"returned_string", "yielded_string"}) def a_solid(context): called["yup"] = True assert context.resources.returned_string == "foo" assert context.resources.yielded_string == "bar" def _do_yield_resource(init_context): saw.append("before yield " + init_context.resource_config) yield init_context.resource_config saw.append("after yield " + init_context.resource_config) yield_string_resource = ResourceDefinition(config_schema=String, resource_fn=_do_yield_resource) def _do_return_resource(init_context): saw.append("before return " + init_context.resource_config) return init_context.resource_config return_string_resource = ResourceDefinition( config_schema=String, resource_fn=_do_return_resource) pipeline_def = PipelineDefinition( name="with_a_yield_resource", solid_defs=[a_solid], mode_defs=[ ModeDefinition( resource_defs={ "yielded_string": yield_string_resource, "returned_string": return_string_resource, }) ], ) result = execute_pipeline( pipeline_def, { "resources": { "returned_string": { "config": "foo" }, "yielded_string": { "config": "bar" } } }, ) assert result.success assert called["yup"] # could be processed in any order in python 2 assert "before yield bar" in saw[0] or "before return foo" in saw[0] assert "before yield bar" in saw[1] or "before return foo" in saw[1] assert "after yield bar" in saw[2]
def test_optional_and_required_context(): @solid(required_resource_keys={"required_resource"}) def needs_resource(_): pass pipeline_def = PipelineDefinition( name="some_pipeline", solid_defs=[needs_resource], mode_defs=[ ModeDefinition( name="mixed", resource_defs={ "optional_resource": ResourceDefinition( lambda _: None, config_schema={"optional_field": Field(String, is_required=False)}, ), "required_resource": ResourceDefinition( lambda _: None, config_schema={"required_field": String}, ), }, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields["solids"].is_required is False assert env_type.fields["execution"].is_required is False assert nested_field(env_type, "resources").is_required assert nested_field(env_type, "resources", "optional_resource").is_required is False assert nested_field(env_type, "resources", "optional_resource", "config").is_required is False assert ( nested_field( env_type, "resources", "optional_resource", "config", "optional_field" ).is_required is False ) assert nested_field(env_type, "resources", "required_resource").is_required assert nested_field(env_type, "resources", "required_resource", "config").is_required assert nested_field( env_type, "resources", "required_resource", "config", "required_field" ).is_required env_obj = EnvironmentConfig.build( pipeline_def, {"resources": {"required_resource": {"config": {"required_field": "foo"}}}}, ) assert env_obj.resources == { "optional_resource": ResourceConfig(None), "required_resource": ResourceConfig({"required_field": "foo"}), "io_manager": ResourceConfig(None), }
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( name='mixed', resource_defs={ 'optional_resource': ResourceDefinition( lambda: None, config_field=Field( dagster_type=Dict( fields={'optional_field': Field(String, is_optional=True)} ) ), ), 'required_resource': ResourceDefinition( lambda: None, config_field=Field( dagster_type=Dict(fields={'required_field': Field(String)}) ), ), }, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional assert nested_field(env_type, 'resources').is_required assert nested_field(env_type, 'resources', 'optional_resource').is_optional assert nested_field(env_type, 'resources', 'optional_resource', 'config').is_optional assert nested_field( env_type, 'resources', 'optional_resource', 'config', 'optional_field' ).is_optional assert nested_field(env_type, 'resources', 'required_resource').is_required assert nested_field(env_type, 'resources', 'required_resource', 'config').is_required assert nested_field( env_type, 'resources', 'required_resource', 'config', 'required_field' ).is_required env_obj = EnvironmentConfig.from_dict( throwing_evaluate_config_value( env_type, {'resources': {'required_resource': {'config': {'required_field': 'foo'}}}} ) ) assert env_obj.resources == { 'optional_resource': {'config': {}}, 'required_resource': {'config': {'required_field': 'foo'}}, }
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( name='mixed', resource_defs={ 'optional_resource': ResourceDefinition( lambda _: None, config_schema={'optional_field': Field(String, is_required=False)}, ), 'required_resource': ResourceDefinition( lambda _: None, config_schema={'required_field': String}, ), }, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is False assert env_type.fields['execution'].is_required is False assert nested_field(env_type, 'resources').is_required assert nested_field(env_type, 'resources', 'optional_resource').is_required is False assert nested_field(env_type, 'resources', 'optional_resource', 'config').is_required is False assert ( nested_field( env_type, 'resources', 'optional_resource', 'config', 'optional_field' ).is_required is False ) assert nested_field(env_type, 'resources', 'required_resource').is_required assert nested_field(env_type, 'resources', 'required_resource', 'config').is_required assert nested_field( env_type, 'resources', 'required_resource', 'config', 'required_field' ).is_required env_obj = EnvironmentConfig.build( pipeline_def, {'resources': {'required_resource': {'config': {'required_field': 'foo'}}}}, ) assert env_obj.resources == { 'optional_resource': {'config': {}}, 'required_resource': {'config': {'required_field': 'foo'}}, }
def test_pipeline_init_failure(): stub_solid = define_stub_solid('stub', None) env_config = {} def failing_resource_fn(*args, **kwargs): raise Exception() pipeline_def = PipelineDefinition( [stub_solid], 'failing_init_pipeline', mode_definitions=[ ModeDefinition( resources={ 'failing': ResourceDefinition( resource_fn=failing_resource_fn) }) ], ) result = execute_pipeline( pipeline_def, environment_dict=env_config, run_config=RunConfig(executor_config=InProcessExecutorConfig( raise_on_error=False)), ) assert result.success is False assert len(result.event_list) == 1 event = result.event_list[0] assert event.event_type_value == 'PIPELINE_INIT_FAILURE' assert event.pipeline_init_failure_data
def test_all_optional_field_on_single_resource(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( resource_defs={ 'with_optional': ResourceDefinition( resource_fn=lambda _: None, config_schema={'optional_field': Field(String, is_required=False)}, ) } ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is False assert env_type.fields['execution'].is_required is False assert env_type.fields['resources'].is_required is False assert nested_field(env_type, 'resources', 'with_optional').is_required is False assert nested_field(env_type, 'resources', 'with_optional', 'config').is_required is False assert ( nested_field(env_type, 'resources', 'with_optional', 'config', 'optional_field').is_required is False )
def test_pipeline_init_failure(): @solid(required_resource_keys={"failing"}) def stub_solid(_): return None env_config = {} def failing_resource_fn(*args, **kwargs): raise Exception() @pipeline( mode_defs=[ ModeDefinition( resource_defs={"failing": ResourceDefinition(resource_fn=failing_resource_fn)} ) ] ) def failing_init_pipeline(): stub_solid() result = execute_pipeline( failing_init_pipeline, run_config=dict(env_config), raise_on_error=False ) assert result.success is False event = result.event_list[-1] assert event.event_type_value == "PIPELINE_INIT_FAILURE" assert event.pipeline_init_failure_data
def define_postgres_db_info_resource(): def _create_postgres_db_info(info): db_url_jdbc = create_postgres_db_url( info.config['postgres_username'], info.config['postgres_password'], info.config['postgres_hostname'], info.config['postgres_db_name'], ) db_url = create_postgres_db_url( info.config['postgres_username'], info.config['postgres_password'], info.config['postgres_hostname'], info.config['postgres_db_name'], jdbc=False, ) def _do_load(data_frame, table_name): data_frame.write.option( 'driver', 'org.postgresql.Driver').mode('overwrite').jdbc( db_url_jdbc, table_name) return DbInfo( url=db_url, jdbc_url=db_url_jdbc, engine=create_postgres_engine(db_url), dialect='postgres', load_table=_do_load, ) return ResourceDefinition(resource_fn=_create_postgres_db_info, config_field=Field(PostgresConfigData))
def define_errorable_resource(): return ResourceDefinition( resource_fn=resource_init, config_schema={ 'throw_on_resource_init': Field(bool, is_required=False, default_value=False) }, )
def test_resource_cleanup(): called = {} def _cleanup_resource_fn(_init_context): called['creation'] = True yield True called['cleanup'] = True @solid(required_resource_keys={'resource_with_cleanup'}) def check_resource_created(context): called['solid'] = True assert context.resources.resource_with_cleanup is True pipeline = PipelineDefinition( name='test_resource_cleanup', solid_defs=[check_resource_created], mode_defs=[ ModeDefinition(resource_defs={ 'resource_with_cleanup': ResourceDefinition(_cleanup_resource_fn) }) ], ) execute_pipeline(pipeline) assert called['creation'] is True assert called['solid'] is True assert called['cleanup'] is True
def test_all_optional_field_on_single_resource(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( resource_defs={ 'with_optional': ResourceDefinition( resource_fn=lambda: None, config_field=Field( Dict({'optional_field': Field(String, is_optional=True)}) ), ) } ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional assert env_type.fields['resources'].is_optional assert nested_field(env_type, 'resources', 'with_optional').is_optional assert nested_field(env_type, 'resources', 'with_optional', 'config').is_optional assert nested_field( env_type, 'resources', 'with_optional', 'config', 'optional_field' ).is_optional
def test_provided_default_on_resources_config(): @solid(name='some_solid', input_defs=[], output_defs=[]) def some_solid(_): return None @pipeline( mode_defs=[ ModeDefinition( name='some_mode', resource_defs={ 'some_resource': ResourceDefinition( resource_fn=lambda _: None, config_schema={ 'with_default_int': Field(Int, is_required=False, default_value=23434) }, ) }, ) ] ) def pipeline_def(): some_solid() env_type = create_environment_type(pipeline_def) some_resource_field = env_type.fields['resources'].config_type.fields['some_resource'] assert some_resource_field.is_required is False some_resource_config_field = some_resource_field.config_type.fields['config'] assert some_resource_config_field.is_required is False assert some_resource_config_field.default_value == {'with_default_int': 23434} assert some_resource_field.default_value == {'config': {'with_default_int': 23434}} value = EnvironmentConfig.build(pipeline_def, {}) assert value.resources == {'some_resource': {'config': {'with_default_int': 23434}}}
def test_resource_cleanup(): called = {} def _cleanup_resource_fn(_init_context): called["creation"] = True yield True called["cleanup"] = True @solid(required_resource_keys={"resource_with_cleanup"}) def check_resource_created(context): called["solid"] = True assert context.resources.resource_with_cleanup is True the_pipeline = PipelineDefinition( name="test_resource_cleanup", solid_defs=[check_resource_created], mode_defs=[ ModeDefinition(resource_defs={ "resource_with_cleanup": ResourceDefinition(_cleanup_resource_fn) }) ], ) execute_pipeline(the_pipeline) assert called["creation"] is True assert called["solid"] is True assert called["cleanup"] is True
def test_required_resource_with_required_subfield(): @solid(required_resource_keys={"with_required"}) def needs_resource(_): pass pipeline_def = PipelineDefinition( name="some_pipeline", solid_defs=[needs_resource], mode_defs=[ ModeDefinition( resource_defs={ "with_required": ResourceDefinition( resource_fn=lambda _: None, config_schema={"required_field": String}, ) } ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields["solids"].is_required is False assert env_type.fields["execution"].is_required is False assert env_type.fields["resources"].is_required assert nested_field(env_type, "resources", "with_required").is_required assert nested_field(env_type, "resources", "with_required", "config").is_required assert nested_field( env_type, "resources", "with_required", "config", "required_field" ).is_required
def test_all_types_provided(): pipeline_def = PipelineDefinition( name="pipeline", solid_defs=[], mode_defs=[ ModeDefinition( name="SomeMode", resource_defs={ "some_resource": ResourceDefinition( lambda _: None, config_schema={ "with_default_int": Field(Int, is_required=False, default_value=23434) }, ) }, ) ], ) run_config_schema = create_run_config_schema(pipeline_def) all_types = list(run_config_schema.all_config_types()) matching_types = [ tt for tt in all_types if tt.kind == ConfigTypeKind.STRICT_SHAPE and "with_default_int" in tt.fields.keys() ] assert len(matching_types) == 1
def test_all_types_provided(): pipeline_def = PipelineDefinition( name='pipeline', solid_defs=[], mode_defs=[ ModeDefinition( name='SomeMode', resource_defs={ 'some_resource': ResourceDefinition( lambda: None, config_field=Field( NamedDict( 'SomeModeNamedDict', { 'with_default_int': Field( Int, is_optional=True, default_value=23434 ) }, ) ), ) }, ) ], ) environment_schema = create_environment_schema(pipeline_def) all_types = list(environment_schema.all_config_types()) type_names = set(t.name for t in all_types) assert 'SomeModeNamedDict' in type_names assert 'Pipeline.Mode.SomeMode.Environment' in type_names assert 'Pipeline.Mode.SomeMode.Resources.SomeResource' in type_names
def test_yield_resource(): called = {} @solid(required_resource_keys={"a_string"}) def a_solid(context): called["yup"] = True assert context.resources.a_string == "foo" def _do_resource(init_context): yield init_context.resource_config yield_string_resource = ResourceDefinition(config_schema=String, resource_fn=_do_resource) pipeline_def = PipelineDefinition( name="with_a_yield_resource", solid_defs=[a_solid], mode_defs=[ ModeDefinition(resource_defs={"a_string": yield_string_resource}) ], ) result = execute_pipeline(pipeline_def, {"resources": { "a_string": { "config": "foo" } }}) assert result.success assert called["yup"]
def test_all_optional_field_on_single_resource(): pipeline_def = PipelineDefinition( name="some_pipeline", solid_defs=[], mode_defs=[ ModeDefinition( resource_defs={ "with_optional": ResourceDefinition( resource_fn=lambda _: None, config_schema={"optional_field": Field(String, is_required=False)}, ) } ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields["solids"].is_required is False assert env_type.fields["execution"].is_required is False assert env_type.fields["resources"].is_required is False assert nested_field(env_type, "resources", "with_optional").is_required is False assert nested_field(env_type, "resources", "with_optional", "config").is_required is False assert ( nested_field(env_type, "resources", "with_optional", "config", "optional_field").is_required is False )
def define_in_memory_store_resource(): return ResourceDefinition( resource_fn=lambda _: InMemoryStore(), description=''' An in-memory key value store that requires no configuration. Useful for unittesting. ''', )
def test_no_config_resource_definition(): called = {} def _return_thing_resource_fn(_init_context): called["resource"] = True return "thing" @solid(required_resource_keys={"return_thing"}) def check_thing(context): called["solid"] = True assert context.resources.return_thing == "thing" pipeline = PipelineDefinition( name="test_no_config_resource", solid_defs=[check_thing], mode_defs=[ ModeDefinition( resource_defs={"return_thing": ResourceDefinition(_return_thing_resource_fn)} ) ], ) execute_pipeline(pipeline) assert called["resource"] assert called["solid"]
def test_pipeline_init_failure(): stub_solid = define_stub_solid('stub', None) env_config = {} def failing_resource_fn(*args, **kwargs): raise Exception() @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'failing': ResourceDefinition(resource_fn=failing_resource_fn) }) ]) def failing_init_pipeline(): stub_solid() result = execute_pipeline(failing_init_pipeline, environment_dict=dict(env_config), raise_on_error=False) assert result.success is False assert len(result.event_list) == 1 event = result.event_list[0] assert event.event_type_value == 'PIPELINE_INIT_FAILURE' assert event.pipeline_init_failure_data
def test_pass_unrelated_type_to_field_error_resource_definition(): with pytest.raises(DagsterInvalidConfigDefinitionError) as exc_info: ResourceDefinition(resource_fn=lambda: None, config='wut') assert str(exc_info.value).startswith( "Error defining config. Original value passed: 'wut'. 'wut' cannot be resolved." )
def test_get_out_of_pipeline_context(): context = dagstermill.get_context( mode_def=ModeDefinition(resource_defs={'list': ResourceDefinition(lambda _: [])}) ) assert context.pipeline_def.name == 'ephemeral_dagstermill_pipeline' assert context.resources.list == []
def test_no_config_resource_definition(): called = {} def _return_thing_resource_fn(_init_context): called['resource'] = True return 'thing' @solid(required_resource_keys={'return_thing'}) def check_thing(context): called['solid'] = True assert context.resources.return_thing == 'thing' pipeline = PipelineDefinition( name='test_no_config_resource', solid_defs=[check_thing], mode_defs=[ ModeDefinition(resource_defs={ 'return_thing': ResourceDefinition(_return_thing_resource_fn) }) ], ) execute_pipeline(pipeline) assert called['resource'] assert called['solid']
def define_redshift_db_info_resource(): def _create_redshift_db_info(info): db_url_jdbc = create_redshift_db_url( info.config['redshift_username'], info.config['redshift_password'], info.config['redshift_hostname'], info.config['redshift_db_name'], ) db_url = create_redshift_db_url( info.config['redshift_username'], info.config['redshift_password'], info.config['redshift_hostname'], info.config['redshift_db_name'], jdbc=False, ) s3_temp_dir = info.config['s3_temp_dir'] def _do_load(data_frame, table_name): data_frame.write.format('com.databricks.spark.redshift').option( 'tempdir', s3_temp_dir).mode('overwrite').jdbc(db_url_jdbc, table_name) return DbInfo( url=db_url, jdbc_url=db_url_jdbc, engine=create_redshift_engine(db_url), dialect='redshift', load_table=_do_load, ) return ResourceDefinition(resource_fn=_create_redshift_db_info, config_field=Field(RedshiftConfigData))
def test_yield_resource(): called = {} @solid(required_resource_keys={'a_string'}) def a_solid(context): called['yup'] = True assert context.resources.a_string == 'foo' def _do_resource(init_context): yield init_context.resource_config yield_string_resource = ResourceDefinition(config_field=Field(String), resource_fn=_do_resource) pipeline_def = PipelineDefinition( name='with_a_yield_resource', solid_defs=[a_solid], mode_defs=[ ModeDefinition(resource_defs={'a_string': yield_string_resource}) ], ) result = execute_pipeline(pipeline_def, {'resources': { 'a_string': { 'config': 'foo' } }}) assert result.success assert called['yup']
def test_required_resource_with_required_subfield(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( resource_defs={ 'with_required': ResourceDefinition( resource_fn=lambda: None, config_schema={'required_field': String}, ) }) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is False assert env_type.fields['execution'].is_required is False assert env_type.fields['resources'].is_required assert nested_field(env_type, 'resources', 'with_required').is_required assert nested_field(env_type, 'resources', 'with_required', 'config').is_required assert nested_field(env_type, 'resources', 'with_required', 'config', 'required_field').is_required