def test_two_contexts(): pipeline_def = PipelineDefinition( name='TwoContextsPipeline', solids=[], context_definitions={ 'context_one': PipelineContextDefinition( context_fn=lambda *args: fail_me(), config_field=Field(Dict({'context_one_field': Field(String)})), ), 'context_two': PipelineContextDefinition( context_fn=lambda *args: fail_me(), config_field=Field(Dict({'context_two_field': Field(Int)})), ), }, ) assert scaffold_pipeline_config(pipeline_def) == {'context': {}} assert scaffold_pipeline_config(pipeline_def, skip_optional=False) == { 'context': { 'context_one': {'config': {'context_one_field': ''}, 'resources': {}}, 'context_two': {'config': {'context_two_field': 0}, 'resources': {}}, }, 'solids': {}, 'expectations': {'evaluate': True}, 'execution': {}, }
def define_more_complicated_nested_config(): return PipelineDefinition( name='more_complicated_nested_config', solids=[ SolidDefinition( name='a_solid_with_multilayered_config', inputs=[], outputs=[], transform_fn=lambda *_args: None, config_field=Field( Dict( { 'field_one': Field(String), 'field_two': Field(String, is_optional=True), 'field_three': Field( String, is_optional=True, default_value='some_value' ), 'nested_field': Field( Dict( { 'field_four_str': Field(String), 'field_five_int': Field(Int), 'field_six_nullable_int_list': Field( List(Nullable(Int)), is_optional=True ), } ) ), } ) ), ) ], )
def test_pipeline_types(): @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', types.String)], outputs=[OutputDefinition(types.Any)], config_field=Field(Dict({'another_field': Field(types.Int)})), ) def solid_one(_context, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string') } }, context_definitions={ 'context_one': PipelineContextDefinition(context_fn=lambda: None, config_field=Field( Dict({'field_one': Field(String)}))) }, ) assert pipeline_def.has_config_type('String') assert pipeline_def.has_config_type('Int') assert not pipeline_def.has_config_type('SomeName')
def test_kitchen_sink(): kitchen_sink = List[Dict({ 'opt_list_of_int': Field(List[int], is_optional=True), 'tuple_of_things': Field(Tuple[int, str]), 'nested_dict': Field( Dict({ 'list_list': Field(List[List[int]]), 'nested_selector': Field( Selector({ 'some_field': Field(int), 'set': Field(Optional[Set[bool]]) })), })), })] kitchen_sink_meta = meta_from_dagster_type(kitchen_sink) rehydrated_meta = deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(kitchen_sink_meta)) assert kitchen_sink_meta == rehydrated_meta
def define_test_type_pipeline(): return PipelineDefinition( name='test_type_pipeline', solids=[ define_solid_for_test_type('int_config', Int), define_solid_for_test_type('list_of_int_config', List(Int)), define_solid_for_test_type('nullable_list_of_int_config', Nullable(List(Int))), define_solid_for_test_type('list_of_nullable_int_config', List(Nullable(Int))), define_solid_for_test_type('nullable_list_of_nullable_int_config', Nullable(List(Nullable(Int)))), define_solid_for_test_type( 'simple_dict', Dict({ 'int_field': Field(Int), 'string_field': Field(String) })), define_solid_for_test_type( 'dict_with_optional_field', Dict({ 'nullable_int_field': Field(Nullable(Int)), 'optional_int_field': Field(Int, is_optional=True), 'string_list_field': Field(List(String)), }), ), define_solid_for_test_type( 'nested_dict', Dict({'nested': Field(Dict({'int_field': Field(Int)}))})), ], )
def more_complicated_nested_config(): @solid( name='a_solid_with_multilayered_config', input_defs=[], output_defs=[], config_field=Field( Dict( { 'field_one': Field(String), 'field_two': Field(String, is_optional=True), 'field_three': Field(String, is_optional=True, default_value='some_value'), 'nested_field': Field( Dict( { 'field_four_str': Field(String), 'field_five_int': Field(Int), 'field_six_nullable_int_list': Field( List[Optional[Int]], is_optional=True ), } ) ), } ) ), ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config()
def test_nullable_dict(): dict_with_int = Dict({'int_field': Field(Int)}) assert not eval_config_value_from_dagster_type(dict_with_int, None).success assert not eval_config_value_from_dagster_type(dict_with_int, {}).success assert not eval_config_value_from_dagster_type(dict_with_int, {'int_field': None}).success assert eval_config_value_from_dagster_type(dict_with_int, {'int_field': 1}).success nullable_dict_with_int = Optional[Dict({'int_field': Field(Int)})] assert eval_config_value_from_dagster_type(nullable_dict_with_int, None).success assert not eval_config_value_from_dagster_type(nullable_dict_with_int, {}).success assert not eval_config_value_from_dagster_type( nullable_dict_with_int, {'int_field': None} ).success assert eval_config_value_from_dagster_type(nullable_dict_with_int, {'int_field': 1}).success dict_with_nullable_int = Dict({'int_field': Field(Optional[Int])}) assert not eval_config_value_from_dagster_type(dict_with_nullable_int, None).success assert not eval_config_value_from_dagster_type(dict_with_nullable_int, {}).success assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': None}).success assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': 1}).success nullable_dict_with_nullable_int = Optional[Dict({'int_field': Field(Optional[Int])})] assert eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, None).success assert not eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, {}).success assert eval_config_value_from_dagster_type( nullable_dict_with_nullable_int, {'int_field': None} ).success assert eval_config_value_from_dagster_type( nullable_dict_with_nullable_int, {'int_field': 1} ).success
def _define_bootstrap_actions(): name = Field(String, description='The name of the bootstrap action.', is_optional=False) path = Field( String, description='''Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system.''', is_optional=False, ) args = Field( List[String], description='A list of command line arguments to pass to the bootstrap action script.', is_optional=True, ) bootstrap_action = Dict( fields={ 'Name': name, 'ScriptBootstrapAction': Field( Dict(fields={'Path': path, 'Args': args}), description='The script run by the bootstrap action.', is_optional=False, ), } ) return Field( List[bootstrap_action], description='''A list of bootstrap actions to run before Hadoop starts on the cluster nodes.''', is_optional=True, )
def test_construct_same_dicts(): int_dict_1 = Dict(fields={'an_int': Field(int)}) int_dict_2 = Dict(fields={'an_int': Field(int)}) # assert identical object assert int_dict_1 is int_dict_2 # assert equivalent key assert int_dict_1.inst().key == int_dict_2.inst().key
def test_field_order_irrelevant(): int_dict_1 = Dict(fields={'an_int': Field(int), 'another_int': Field(int)}) int_dict_2 = Dict(fields={'another_int': Field(int), 'an_int': Field(int)}) # assert identical object assert int_dict_1 is int_dict_2 # assert equivalent key assert int_dict_1.inst().key == int_dict_2.inst().key
def _nested_optional_config_with_default(): return Field( Dict({ 'nested': Field( Dict({ 'int_field': Field(Int, is_optional=True, default_value=3) })) }))
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( name='mixed', resource_defs={ 'optional_resource': ResourceDefinition( lambda: None, config_field=Field( dagster_type=Dict( fields={'optional_field': Field(String, is_optional=True)} ) ), ), 'required_resource': ResourceDefinition( lambda: None, config_field=Field( dagster_type=Dict(fields={'required_field': Field(String)}) ), ), }, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional assert nested_field(env_type, 'resources').is_required assert nested_field(env_type, 'resources', 'optional_resource').is_optional assert nested_field(env_type, 'resources', 'optional_resource', 'config').is_optional assert nested_field( env_type, 'resources', 'optional_resource', 'config', 'optional_field' ).is_optional assert nested_field(env_type, 'resources', 'required_resource').is_required assert nested_field(env_type, 'resources', 'required_resource', 'config').is_required assert nested_field( env_type, 'resources', 'required_resource', 'config', 'required_field' ).is_required env_obj = EnvironmentConfig.from_dict( throwing_evaluate_config_value( env_type, {'resources': {'required_resource': {'config': {'required_field': 'foo'}}}} ) ) assert env_obj.resources == { 'optional_resource': {'config': {}}, 'required_resource': {'config': {'required_field': 'foo'}}, }
def test_build_optionality(): optional_test_type = Field( Dict({ 'required': Field(Dict({'value': Field(String)})), 'optional': Field(Dict({'value': Field(String, is_optional=True)})), })).config_type assert optional_test_type.fields['required'].is_optional is False assert optional_test_type.fields['optional'].is_optional is True
def _define_steps(): name = Field(String, description='The name of the step.', is_optional=False) actionOnFailure = Field( EmrActionOnFailure, description='''The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using TERMINATE_CLUSTER instead.''', is_optional=True, ) hadoopJarStep = Field( Dict( fields={ 'Properties': Field( List[Dict(fields={'Key': Field(String), 'Value': Field(String)})], description='''A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.''', is_optional=True, ), 'Jar': Field( String, description='A path to a JAR file run during the step.', is_optional=False, ), 'MainClass': Field( String, description='''The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.''', is_optional=True, ), 'Args': Field( List[String], description='''A list of command line arguments passed to the JAR file's main function when executed.''', is_optional=True, ), } ), description='The JAR file used for the step.', ) return Field( List[ Dict( fields={ 'Name': name, 'ActionOnFailure': actionOnFailure, 'HadoopJarStep': hadoopJarStep, } ) ], description='A list of steps to run.', )
def test_nested_dict(): nested_type = Dict({'int_field': Field(Int)}) outer_type = Dict({'nested': Field(nested_type)}) output = print_type_to_string(outer_type) assert_inner_types(outer_type, Int, nested_type) expected = '''{ nested: { int_field: Int } }''' assert output == expected
def define_multi_mode_with_loggers_pipeline(): foo_logger_captured_results = [] bar_logger_captured_results = [] @logger(config_field=Field( Dict({ 'log_level': Field(String, is_optional=True, default_value='INFO') }))) def foo_logger(init_context): logger_ = logging.Logger('foo') logger_.log = lambda level, msg, **kwargs: foo_logger_captured_results.append( (level, msg)) logger_.setLevel( coerce_valid_log_level(init_context.logger_config['log_level'])) return logger_ @logger(config_field=Field( Dict({ 'log_level': Field(String, is_optional=True, default_value='INFO') }))) def bar_logger(init_context): logger_ = logging.Logger('bar') logger_.log = lambda level, msg, **kwargs: bar_logger_captured_results.append( (level, msg)) logger_.setLevel( coerce_valid_log_level(init_context.logger_config['log_level'])) return logger_ @solid def return_six(context): context.log.critical('Here we are') return 6 return ( PipelineDefinition( name='multi_mode', solid_defs=[return_six], mode_defs=[ ModeDefinition(name='foo_mode', logger_defs={'foo': foo_logger}), ModeDefinition(name='foo_bar_mode', logger_defs={ 'foo': foo_logger, 'bar': bar_logger }), ], ), foo_logger_captured_results, bar_logger_captured_results, )
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solids=[], context_definitions={ 'optional_field_context': PipelineContextDefinition( context_fn=lambda *args: None, config_field=Field(dagster_type=Dict( fields={'optional_field': Field(String, is_optional=True) })), ), 'required_field_context': PipelineContextDefinition( context_fn=lambda *args: None, config_field=Field(dagster_type=Dict( fields={'required_field': Field(String)})), ), }, ) env_type = pipeline_def.environment_type assert env_type.fields['solids'].is_optional assert env_type.fields['context'].is_optional is False context_type = env_type.fields['context'].config_type assert context_type.fields['optional_field_context'].is_optional assert context_type.fields['required_field_context'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional env_obj = construct_environment_config( throwing_evaluate_config_value( pipeline_def.environment_type, { 'context': { 'optional_field_context': { 'config': { 'optional_field': 'foobar' } } } }, )) assert env_obj.context.name == 'optional_field_context' assert env_obj.context.config == {'optional_field': 'foobar'}
def test_pipeline_types(): @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', String)], outputs=[OutputDefinition(Any)], config_field=Field(Dict({'another_field': Field(Int)})), ) def solid_one(_context, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string') } }, ) environment_schema = create_environment_schema(pipeline_def) assert environment_schema.has_config_type('String') assert environment_schema.has_config_type('Int') assert not environment_schema.has_config_type('SomeName')
def test_config_list_in_dict(): nested_list = Dict({'nested_list': Field(List[Int])}) value = {'nested_list': [1, 2, 3]} result = eval_config_value_from_dagster_type(nested_list, value) assert result.success assert result.value == value
def test_secret_field(): @solid(config_field=Field( Dict({ 'password': Field(String, is_secret=True), 'notpassword': Field(String) }))) def solid_with_secret(_context): pass pipeline_def = PipelineDefinition(name='secret_pipeline', solid_defs=[solid_with_secret]) environment_schema = create_environment_schema(pipeline_def) config_type = environment_schema.config_type_named( 'SecretPipeline.SolidConfig.SolidWithSecret') assert config_type password_field = config_type.fields['config'].config_type.fields[ 'password'] assert password_field.is_secret notpassword_field = config_type.fields['config'].config_type.fields[ 'notpassword'] assert not notpassword_field.is_secret
def test_invalid_dict_call(): # prior to 0.7.0 dicts in config contexts were callable with pytest.raises(TypeError, match=re.escape("'DagsterDictApi' object is not callable")): @solid(config=Dict({'foo': int})) # pylint: disable=not-callable def _solid(_): pass
def test_all_optional_field_on_single_resource(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( resource_defs={ 'with_optional': ResourceDefinition( resource_fn=lambda: None, config_field=Field( Dict({'optional_field': Field(String, is_optional=True)}) ), ) } ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional assert env_type.fields['resources'].is_optional assert nested_field(env_type, 'resources', 'with_optional').is_optional assert nested_field(env_type, 'resources', 'with_optional', 'config').is_optional assert nested_field( env_type, 'resources', 'with_optional', 'config', 'optional_field' ).is_optional
def test_provided_default_config(): pipeline_def = PipelineDefinition( context_definitions={ 'some_context': PipelineContextDefinition( config_field=Field( Dict({'with_default_int': Field(Int, is_optional=True, default_value=23434)}) ), context_fn=lambda *args: None, ) }, solids=[ SolidDefinition( name='some_solid', inputs=[], outputs=[], transform_fn=lambda *args: None ) ], ) env_type = pipeline_def.environment_type some_context_field = env_type.fields['context'].config_type.fields['some_context'] assert some_context_field.is_optional some_context_config_field = some_context_field.config_type.fields['config'] assert some_context_config_field.is_optional assert some_context_config_field.default_value == {'with_default_int': 23434} assert some_context_field.default_value == { 'config': {'with_default_int': 23434}, 'resources': {}, } value = construct_environment_config( throwing_evaluate_config_value(pipeline_def.environment_type, {}) ) assert value.context.name == 'some_context' assert env_type.type_attributes.is_system_config
def test_execution_plan_create_metadata(): solid_def = SolidDefinition( name='solid_metadata_creation', inputs=[], outputs=[], compute_fn=lambda *args, **kwargs: None, config_field=Field(Dict({'str_value': Field(String)})), step_metadata_fn=lambda env_config: { 'computed': env_config.solids['solid_metadata_creation'].config['str_value'] + '1' }, ) p_def = PipelineDefinition(name='test_metadata', solids=[solid_def]) execution_plan = create_execution_plan( p_def, environment_dict={ 'solids': { 'solid_metadata_creation': { 'config': { 'str_value': 'foobar' } } } }, ) transform_step = execution_plan.get_step_by_key( 'solid_metadata_creation.compute') assert transform_step assert transform_step.metadata == {'computed': 'foobar1'}
def test_basic_solid_with_config(): did_get = {} def _t_fn(context, _inputs): did_get['yep'] = context.solid_config solid = SolidDefinition( name='solid_with_context', inputs=[], outputs=[], config_field=Field(Dict({'some_config': Field(String)})), compute_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid]) execute_pipeline( pipeline, {'solids': { 'solid_with_context': { 'config': { 'some_config': 'foo' } } }}) assert 'yep' in did_get assert 'some_config' in did_get['yep']
def test_required_resource_with_required_subfield(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_definitions=[ ModeDefinition( resources={ 'with_required': ResourceDefinition( resource_fn=lambda: None, config_field=Field( Dict({'required_field': Field(String)})), ) }) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional assert env_type.fields['execution'].is_optional assert env_type.fields['expectations'].is_optional assert env_type.fields['resources'].is_required assert nested_field(env_type, 'resources', 'with_required').is_required assert nested_field(env_type, 'resources', 'with_required', 'config').is_required assert nested_field(env_type, 'resources', 'with_required', 'config', 'required_field').is_required
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(context): assert context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_field=Field( Dict({ 'field_one': Field(dagster_type=String, is_optional=True, default_value='heyo') })), context_fn=lambda init_context: ExecutionContext( resources=init_context.context_config), ) }, ) execute_pipeline(pipeline, environment_dict={'context': { 'custom_one': {} }}) execute_pipeline(pipeline, environment_dict={'context': { 'custom_one': None }})
def create_templated_sql_transform_solid(name, sql, table_arguments, dependant_solids=None): check.str_param(name, 'name') check.str_param(sql, 'sql') check.list_param(table_arguments, 'table_arguments', of_type=str) dependant_solids = check.opt_list_param(dependant_solids, 'dependant_solids', of_type=SolidDefinition) field_dict = {} for table in table_arguments: field_dict[table] = Field(String) return SolidDefinition( name=name, inputs=[InputDefinition(solid.name) for solid in dependant_solids], config_field=Field(Dict(field_dict)), transform_fn=_create_templated_sql_transform_with_output(sql), outputs=[ OutputDefinition(name='result', dagster_type=Any), OutputDefinition(name='sql_text', dagster_type=SqlTextType), ], )
def define_bigquery_delete_dataset_config(): dataset = Field(Dataset, description='A dataset to delete.', is_optional=False) delete_contents = Field( Bool, description='''If True, delete all the tables in the dataset. If False and the dataset contains tables, the request will fail. Default is False.''', is_optional=True, ) not_found_ok = Field( Bool, description='''Defaults to False. If True, ignore "not found" errors when deleting the dataset.''', is_optional=True, ) return Field( Dict( fields={ 'dataset': dataset, 'delete_contents': delete_contents, 'not_found_ok': not_found_ok, } ), description='BigQuery delete dataset configuration', )
def test_field_things(): dict_meta = meta_from_dagster_type( Dict({ 'req': Field(int), 'opt': Field(int, is_optional=True), 'opt_with_default': Field(int, is_optional=True, default_value=2), 'req_with_desc': Field(int, description='A desc'), })) assert dict_meta.fields and len(dict_meta.fields) == 4 field_meta_dict = { field_meta.name: field_meta for field_meta in dict_meta.fields } assert field_meta_dict['req'].is_optional is False assert field_meta_dict['req'].description is None assert field_meta_dict['opt'].is_optional is True assert field_meta_dict['opt_with_default'].is_optional is True assert field_meta_dict['opt_with_default'].default_provided is True assert field_meta_dict['opt_with_default'].default_value_as_str == '2' assert field_meta_dict['req_with_desc'].is_optional is False assert field_meta_dict['req_with_desc'].description == 'A desc'