def test_two_contexts(): pipeline_def = PipelineDefinition( name='TwoContextsPipeline', solids=[], context_definitions={ 'context_one': PipelineContextDefinition( context_fn=lambda *args: fail_me(), config_field=Field(Dict({'context_one_field': Field(String)})), ), 'context_two': PipelineContextDefinition( context_fn=lambda *args: fail_me(), config_field=Field(Dict({'context_two_field': Field(Int)})), ), }, ) assert scaffold_pipeline_config(pipeline_def) == {'context': {}} assert scaffold_pipeline_config(pipeline_def, skip_optional=False) == { 'context': { 'context_one': {'config': {'context_one_field': ''}, 'resources': {}}, 'context_two': {'config': {'context_two_field': 0}, 'resources': {}}, }, 'solids': {}, 'expectations': {'evaluate': True}, 'execution': {}, }
def test_select_context(): context_defs = { 'int_context': PipelineContextDefinition( config_field=Field(Int), context_fn=lambda *args: ExecutionContext() ), 'string_context': PipelineContextDefinition( config_field=Field(String), context_fn=lambda *args: ExecutionContext() ), } context_config_type = define_context_context_cls('something', context_defs).inst() assert construct_context_config( throwing_evaluate_config_value(context_config_type, {'int_context': {'config': 1}}) ) == ContextConfig(name='int_context', config=1) assert construct_context_config( throwing_evaluate_config_value(context_config_type, {'string_context': {'config': 'bar'}}) ) == ContextConfig(name='string_context', config='bar') # mismatched field type mismatch with pytest.raises(DagsterEvaluateConfigValueError): assert throwing_evaluate_config_value( context_config_type, {'int_context': {'config': 'bar'}} ) # mismatched field type mismatch with pytest.raises(DagsterEvaluateConfigValueError): assert throwing_evaluate_config_value( context_config_type, {'string_context': {'config': 1}} )
def test_custom_contexts(): @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ), 'custom_two': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ) }, ) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) environment_two = config.Environment( context=config.Context('custom_two', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_two)
def define_resource_test_pipeline(): return PipelineDefinition( name='resource_test_pipeline', solids=[add_ints], context_definitions={ 'local': PipelineContextDefinition( resources={'store': define_in_memory_store_resource()}), 'cloud': PipelineContextDefinition( resources={'store': define_cloud_store_resource()}), }, )
def create_mem_sql_pipeline_context_tuple(solids, dependencies=None): default_def = PipelineContextDefinition( context_fn=lambda _info: in_mem_context(), ) persisted_def = PipelineContextDefinition( context_fn=lambda _info: create_persisted_context(), ) return PipelineDefinition( solids=solids, dependencies=dependencies, context_definitions={ 'default': default_def, 'persisted': persisted_def }, )
def test_wrong_context(): pipeline_def = PipelineDefinition( name='pipeline_test_multiple_context', context_definitions={ 'context_one': PipelineContextDefinition(context_fn=lambda *_args: fail_me()), 'context_two': PipelineContextDefinition(context_fn=lambda *_args: fail_me()), }, solids=[], ) with pytest.raises(PipelineConfigEvaluationError, match='Undefined field "nope" at path root:context'): execute_pipeline(pipeline_def, {'context': {'nope': {}}})
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solids=[], context_definitions={ 'optional_field_context': PipelineContextDefinition( context_fn=lambda *args: None, config_field=Field(dagster_type=Dict( fields={'optional_field': Field(String, is_optional=True) })), ), 'required_field_context': PipelineContextDefinition( context_fn=lambda *args: None, config_field=Field(dagster_type=Dict( fields={'required_field': Field(String)})), ), }, ) env_type = pipeline_def.environment_type assert env_type.fields['solids'].is_optional assert env_type.fields['context'].is_optional is False context_type = env_type.fields['context'].config_type assert context_type.fields['optional_field_context'].is_optional assert context_type.fields['required_field_context'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional env_obj = construct_environment_config( throwing_evaluate_config_value( pipeline_def.environment_type, { 'context': { 'optional_field_context': { 'config': { 'optional_field': 'foobar' } } } }, )) assert env_obj.context.name == 'optional_field_context' assert env_obj.context.config == {'optional_field': 'foobar'}
def test_user_injected_context_stack(): called = {} @solid def check_context_stack(info): assert info.context.get_context_value('foo') == 'bar' assert info.context.get_context_value('quux') == 'baaz' called['yup'] = True def _create_context(_info): return ExecutionContext(context_stack={'quux': 'baaz'}) pipeline_def = PipelineDefinition( name='injected_run_id', solids=[check_context_stack], context_definitions={ 'default': PipelineContextDefinition(context_fn=_create_context) }, ) result = execute_pipeline( pipeline_def, reentrant_info=ReentrantInfo(context_stack={'foo': 'bar'})) assert result.success assert called['yup']
def test_user_injected_context_stack_collision(): called = {} @solid def check_context_stack(info): assert info.context.get_context_value('foo') == 'bar' assert info.context.get_context_value('quux') == 'baaz' called['yup'] = True def _create_context(_info): return ExecutionContext(context_stack={'foo': 'baaz'}) pipeline_def = PipelineDefinition( name='injected_run_id', solids=[check_context_stack], context_definitions={ 'default': PipelineContextDefinition(context_fn=_create_context) }, ) with pytest.raises(DagsterInvariantViolationError, match='You have specified'): execute_pipeline( pipeline_def, reentrant_info=ReentrantInfo(context_stack={'foo': 'bar'}))
def test_context_selector_none_given(): @solid def check_context(_info): assert False pipeline_def = PipelineDefinition( name='context_selector_none_given', solids=[check_context], context_definitions={ 'context_required_int': PipelineContextDefinition( context_fn=lambda info: ExecutionContext(resources=info.config ), config_field=Field(Int), ) }, ) with pytest.raises(PipelineConfigEvaluationError) as pe_info: execute_pipeline(pipeline_def, environment={'context': None}) pe = pe_info.value cse = pe.errors[0] assert cse.reason == DagsterEvaluationErrorReason.SELECTOR_FIELD_ERROR assert 'You specified no fields at path "root:context"' in str(pe)
def test_single_solid_with_context_config(): ran = {'check_context_config_for_two': 0} @solid def check_context_config_for_two(info): assert info.resources == 2 ran['check_context_config_for_two'] += 1 pipeline_def = PipelineDefinition( solids=[check_context_config_for_two], context_definitions={ 'test_context': PipelineContextDefinition( config_field=Field(Int, is_optional=True, default_value=2), context_fn=lambda info: ExecutionContext(resources=info.config), ) }, ) result = execute_solid( pipeline_def, 'check_context_config_for_two', environment={'context': {'test_context': {'config': 2}}}, ) assert result.success assert ran['check_context_config_for_two'] == 1 result = execute_solid(pipeline_def, 'check_context_config_for_two') assert result.success assert ran['check_context_config_for_two'] == 2
def _tempfile_context(): return { 'test': PipelineContextDefinition( context_fn=lambda info: ExecutionContext.console_logging(log_level=logging.DEBUG), resources={'tempfile': define_tempfile_resource()}, ) }
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(context): assert context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_field=Field( Dict({ 'field_one': Field(dagster_type=String, is_optional=True, default_value='heyo') })), context_fn=lambda init_context: ExecutionContext( resources=init_context.context_config), ) }, ) execute_pipeline(pipeline, environment_dict={'context': { 'custom_one': {} }}) execute_pipeline(pipeline, environment_dict={'context': { 'custom_one': None }})
def test_all_types_provided(): pipeline_def = PipelineDefinition( name='pipeline', solids=[], context_definitions={ 'some_context': PipelineContextDefinition( config_field=Field( NamedDict( 'SomeContextNamedDict', { 'with_default_int': Field(Int, is_optional=True, default_value=23434) }, )), context_fn=lambda *args: None, ) }, ) all_types = list(pipeline_def.all_config_types()) type_names = set(t.name for t in all_types) assert 'SomeContextNamedDict' in type_names assert 'Pipeline.ContextDefinitionConfig.SomeContext' in type_names assert 'Pipeline.ContextDefinitionConfig.SomeContext.Resources' in type_names
def test_provided_default_config(): pipeline_def = PipelineDefinition( context_definitions={ 'some_context': PipelineContextDefinition( config_field=Field( Dict({'with_default_int': Field(Int, is_optional=True, default_value=23434)}) ), context_fn=lambda *args: None, ) }, solids=[ SolidDefinition( name='some_solid', inputs=[], outputs=[], transform_fn=lambda *args: None ) ], ) env_type = pipeline_def.environment_type some_context_field = env_type.fields['context'].config_type.fields['some_context'] assert some_context_field.is_optional some_context_config_field = some_context_field.config_type.fields['config'] assert some_context_config_field.is_optional assert some_context_config_field.default_value == {'with_default_int': 23434} assert some_context_field.default_value == { 'config': {'with_default_int': 23434}, 'resources': {}, } value = construct_environment_config( throwing_evaluate_config_value(pipeline_def.environment_type, {}) ) assert value.context.name == 'some_context' assert env_type.type_attributes.is_system_config
def test_context_selector_wrong_name(): @solid def check_context(_context): assert False pipeline_def = PipelineDefinition( name='context_selector_wrong_name', solids=[check_context], context_definitions={ 'context_required_int': PipelineContextDefinition( context_fn=lambda init_context: ExecutionContext( resources=init_context.solid_config), config_field=Field(Int), ) }, ) with pytest.raises(PipelineConfigEvaluationError) as pe_info: execute_pipeline( pipeline_def, environment_dict={'context': { 'wrong_name': { 'config': None } }}) pe = pe_info.value cse = pe.errors[0] assert cse.reason == DagsterEvaluationErrorReason.FIELD_NOT_DEFINED assert 'Undefined field "wrong_name" at path root:context' in str(pe)
def test_context_selector_none_given(): @solid def check_context(_context): assert False pipeline_def = PipelineDefinition( name='context_selector_none_given', solids=[check_context], context_definitions={ 'context_required_int': PipelineContextDefinition( context_fn=lambda init_context: ExecutionContext( resources=init_context.solid_config), config_field=Field(Int), ) }, ) with pytest.raises(PipelineConfigEvaluationError) as pe_info: execute_pipeline(pipeline_def, environment_dict={'context': None}) pe = pe_info.value cse = pe.errors[0] assert cse.reason == DagsterEvaluationErrorReason.SELECTOR_FIELD_ERROR assert cse.message == ( '''Must specify the required field at path root:context. Defined ''' '''fields: ['context_required_int']''')
def test_context_selector_working(): called = {} @solid def check_context(context): assert context.resources == 32 called['yup'] = True pipeline_def = PipelineDefinition( name='context_selector_working', solids=[check_context], context_definitions={ 'context_required_int': PipelineContextDefinition( context_fn=lambda init_context: ExecutionContext( resources=init_context.context_config), config_field=Field(Int), ) }, ) result = execute_pipeline( pipeline_def, environment_dict={'context': { 'context_required_int': { 'config': 32 } }}) assert result.success assert called['yup']
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(info): assert info.context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict({ 'field_one': Field( dagster_type=types.String, is_optional=True, default_value='heyo', ) }), context_fn=lambda info: ExecutionContext(resources=info.config ), ), }) execute_pipeline(pipeline, environment=config.Environment( context=config.Context('custom_one', {})))
def test_pipeline_types(): @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', types.String)], outputs=[OutputDefinition(types.Any)], config_field=Field(Dict({'another_field': Field(types.Int)})), ) def solid_one(_context, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string') } }, context_definitions={ 'context_one': PipelineContextDefinition(context_fn=lambda: None, config_field=Field( Dict({'field_one': Field(String)}))) }, ) assert pipeline_def.has_config_type('String') assert pipeline_def.has_config_type('Int') assert not pipeline_def.has_config_type('SomeName')
def define_pipeline(): return PipelineDefinition( name='pyspark_pagerank', context_definitions={ 'local': PipelineContextDefinition( resources={'spark': spark_session_resource}) }, solids=[ parse_pagerank_data, compute_links, calculate_ranks, log_ranks, ], dependencies={ 'compute_links': { 'urls': DependencyDefinition('parse_pagerank_data') }, 'calculate_ranks': { 'links': DependencyDefinition('compute_links') }, 'log_ranks': { 'ranks': DependencyDefinition('calculate_ranks', 'ranks') }, }, )
def test_no_config_resource_bare_no_arg(): called = {} @resource def return_thing(_init_context): called['resource'] = True return 'thing' @solid def check_thing(context): called['solid'] = True assert context.resources.return_thing == 'thing' pipeline = PipelineDefinition( name='test_no_config_resource', solids=[check_thing], context_definitions={ 'default': PipelineContextDefinition(resources={'return_thing': return_thing}) }, ) execute_pipeline(pipeline) assert called['resource'] assert called['solid']
def execute_single_solid_in_isolation( context_params, solid_def, environment=None, throw_on_user_error=True ): ''' Deprecated. Execute a solid outside the context of a pipeline, with an already-created context. Prefer execute_solid in dagster.utils.test ''' check.inst_param(context_params, 'context_params', ExecutionContext) check.inst_param(solid_def, 'solid_def', SolidDefinition) environment = check.opt_dict_param(environment, 'environment') check.bool_param(throw_on_user_error, 'throw_on_user_error') single_solid_environment = { 'expectations': environment.get('expectations'), 'context': environment.get('context'), 'solids': {solid_def.name: environment['solids'][solid_def.name]} if solid_def.name in environment.get('solids', {}) else None, } pipeline_result = execute_pipeline( PipelineDefinition( solids=[solid_def], context_definitions=PipelineContextDefinition.passthrough_context_definition( context_params ), ), environment_dict=single_solid_environment, throw_on_user_error=throw_on_user_error, ) return pipeline_result
def test_basic_resource(): called = {} @solid def a_solid(info): called['yup'] = True assert info.context.resources.a_string == 'foo' pipeline_def = PipelineDefinition( name='with_a_resource', solids=[a_solid], context_definitions={ 'default': PipelineContextDefinition( resources={'a_string': define_string_resource()}) }, ) result = execute_pipeline(pipeline_def, { 'context': { 'default': { 'resources': { 'a_string': { 'config': 'foo' } } } } }) assert result.success assert called['yup']
def test_yield_resource(): called = {} @solid def a_solid(context): called['yup'] = True assert context.resources.a_string == 'foo' def _do_resource(init_context): yield init_context.resource_config yield_string_resource = ResourceDefinition(config_field=Field(String), resource_fn=_do_resource) pipeline_def = PipelineDefinition( name='with_a_yield_resource', solids=[a_solid], context_definitions={ 'default': PipelineContextDefinition(resources={'a_string': yield_string_resource}) }, ) result = execute_pipeline( pipeline_def, {'context': {'default': {'resources': {'a_string': {'config': 'foo'}}}}} ) assert result.success assert called['yup']
def test_string_resource(): called = {} @solid def solid_test_string(info): assert info.context.resources.test_string == 'foo' called['yup'] = True pipeline = PipelineDefinition( name='test_string_resource', solids=[solid_test_string], context_definitions={ 'default': PipelineContextDefinition( resources={ 'test_string': ResourceDefinition.string_resource() }) }, ) result = execute_pipeline(pipeline, { 'context': { 'default': { 'resources': { 'test_string': { 'config': 'foo' } } } } }) assert result.success assert called['yup']
def _spark_context(): return { 'test': PipelineContextDefinition( context_fn=lambda info: ExecutionContext.console_logging(log_level=logging.DEBUG), resources={'spark': define_lambda_resource(create_spark_session_local)}, ) }
def test_yield_context(): events = [] @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} assert info.context._context_dict['foo'] == 'bar' # pylint: disable=W0212 events.append('during') def _yield_context(info): events.append('before') context = ExecutionContext(resources=info.config) with context.value('foo', 'bar'): yield context events.append('after') pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=_yield_context, ), }) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) assert events == ['before', 'during', 'after']
def test_user_injected_tags_collision(): called = {} @solid def check_tags(context): assert context.get_tag('foo') == 'bar' assert context.get_tag('quux') == 'baaz' called['yup'] = True def _create_context(_context): return ExecutionContext(tags={'foo': 'baaz'}) pipeline_def = PipelineDefinition( name='injected_run_id', solids=[check_tags], context_definitions={ 'default': PipelineContextDefinition(context_fn=_create_context) }, ) with pytest.raises(DagsterInvariantViolationError, match='You have specified'): execute_pipeline( pipeline_def, execution_metadata=ExecutionMetadata(tags={'foo': 'bar'}))
def test_no_config_resource_definition(): called = {} def _return_thing_resource_fn(_info): called['resource'] = True return 'thing' @solid def check_thing(info): called['solid'] = True assert info.resources.return_thing == 'thing' pipeline = PipelineDefinition( name='test_no_config_resource', solids=[check_thing], context_definitions={ 'default': PipelineContextDefinition(resources={ 'return_thing': ResourceDefinition(_return_thing_resource_fn) }) }, ) execute_pipeline(pipeline) assert called['resource'] assert called['solid']