def test_pipeline_types(): ContextOneConfigDict = types.ConfigDictionary( 'ContextOneConfigDict', {'field_one': Field(types.String)}, ) SolidOneConfigDict = types.ConfigDictionary( 'SolidOneConfigDict', {'another_field': Field(types.Int)}, ) @lambda_solid def produce_string(): return 'foo' @solid( inputs=[InputDefinition('input_one', types.String)], outputs=[OutputDefinition(types.Any)], config_def=ConfigDefinition(SolidOneConfigDict), ) def solid_one(_info, input_one): raise Exception('should not execute') pipeline_def = PipelineDefinition( solids=[produce_string, solid_one], dependencies={ 'solid_one': { 'input_one': DependencyDefinition('produce_string'), } }, context_definitions={ 'context_one': PipelineContextDefinition( context_fn=lambda: None, config_def=ConfigDefinition(ContextOneConfigDict), ) }) present_types = [ SolidOneConfigDict, ContextOneConfigDict, types.String, types.Any, types.Int, ] for present_type in present_types: name = present_type.name assert pipeline_def.has_type(name) assert pipeline_def.type_named(name).name == name not_present_types = [ types.Bool, types.Dict, types.PythonObjectType('Duisjdfke', dict), ] for not_present_type in not_present_types: assert not pipeline_def.has_type(not_present_type.name)
def _nested_optional_config_with_no_default(): return ConfigDefinition(config_type=types.ConfigDictionary({ 'nested': Field(dagster_type=types.ConfigDictionary( {'int_field': Field( types.Int, is_optional=True, )})), }))
def _single_nested_config(): return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', { 'nested': Field( dagster_type=types.ConfigDictionary( 'NestedType', {'int_field': Field(types.Int)}, ) ), } ) )
def _nested_optional_config_with_no_default(): nested_type = types.ConfigDictionary( 'NestedType', { 'int_field': Field( types.Int, is_optional=True, ), }, ) return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', {'nested': Field(dagster_type=nested_type)}, ) )
def _nested_optional_config_with_default(): return ConfigDefinition( config_type=types.ConfigDictionary( 'ParentType', { 'nested': Field( dagster_type=types.ConfigDictionary( 'NestedType', {'int_field': Field( types.Int, is_optional=True, default_value=3, )} ) ), } ) )
def test_double_type(): @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_one(_info): raise Exception('should not execute') @solid(config_def=ConfigDefinition( types.ConfigDictionary( 'SomeTypeName', {'some_field': Field(types.String)}, ), )) def solid_two(_info): raise Exception('should not execute') with pytest.raises(DagsterInvalidDefinitionError, match='Type names must be unique.'): PipelineDefinition(solids=[solid_one, solid_two])
def define_part_nine_final(): return PipelineDefinition( name='part_nine_final', solids=define_contextful_solids(), dependencies={ 'add_ints': { 'num_one': DependencyDefinition('injest_a'), 'num_two': DependencyDefinition('injest_b'), }, 'mult_ints': { 'num_one': DependencyDefinition('injest_a'), 'num_two': DependencyDefinition('injest_b'), }, }, context_definitions={ 'local': PipelineContextDefinition( context_fn=lambda *_args: ExecutionContext.console_logging( log_level=DEBUG, resources=PartNineResources(InMemoryStore()))), 'cloud': PipelineContextDefinition( context_fn=lambda info: ExecutionContext.console_logging( resources=PartNineResources( PublicCloudStore(info.config['credentials']))), config_def=ConfigDefinition(config_type=types.ConfigDictionary( name='CloudConfigDict', fields={ 'credentials': Field( types.ConfigDictionary( name='CredentialsConfigDict', fields={ 'user': Field(types.String), 'pass': Field(types.String), })), })), ) })
DependencyDefinition, Field, InputDefinition, OutputDefinition, PipelineDefinition, Result, SolidDefinition, check, config, execute_pipeline, types, ) from dagster.core.test_utils import single_output_transform SingleValueDict = types.ConfigDictionary('SingleValueDict', {'value': Field(types.String)}) def define_pass_value_solid(name, description=None): check.str_param(name, 'name') check.opt_str_param(description, 'description') def _value_t_fn(info, _inputs): yield Result(info.config['value']) return SolidDefinition( name=name, description=description, inputs=[], outputs=[OutputDefinition(types.String)], config_def=ConfigDefinition(SingleValueDict),
def __init__(self, table_name): super(DagsterSqlTableExpression, self).__init__() self._table_name = check.str_param(table_name, 'table_name') @property def query_text(self): check.not_implemented('table cannot be a standalone query') @property def from_target(self): return self._table_name CreateTableConfigDict = types.ConfigDictionary( 'CreateTableConfigDict', { 'table_name': Field(types.String), } ) def define_create_table_solid(name): def _materialization_fn(info, inputs): sql_expr = inputs['expr'] check.inst(sql_expr, DagsterSqlExpression) output_table_name = check.str_elem(info.config, 'table_name') total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format( output_table_name=output_table_name, query_text=sql_expr.query_text ) info.context.resources.sa.engine.connect().execute(total_sql) return SolidDefinition(
def _create_dataframe_type(): return types.PythonObjectType( name='PandasDataFrame', python_type=pd.DataFrame, description='''Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). See http://pandas.pydata.org/''', ) DataFrame = _create_dataframe_type() LoadDataFrameConfigDict = types.ConfigDictionary( 'LoadDataFrameConfigDict', { 'path': Field(types.Path), }, ) WriteDataFrameConfigDict = types.ConfigDictionary( 'WriteDataFrameConfigDict', { 'path': Field(types.Path), }, ) def load_csv_solid(name): check.str_param(name, 'name') def _t_fn(info, _inputs):