def define_context_context_cls(pipeline_name, context_definitions): check.str_param(pipeline_name, 'pipeline_name') check.dict_param( context_definitions, 'context_definitions', key_type=str, value_type=PipelineContextDefinition, ) full_type_name = '{pipeline_name}.ContextConfig'.format( pipeline_name=pipeline_name) field_dict = {} if len(context_definitions) == 1: context_name, context_definition = single_item(context_definitions) field_dict[context_name] = Field( define_specific_context_cls(pipeline_name, context_name, context_definition)) else: for context_name, context_definition in context_definitions.items(): field_dict[context_name] = Field( define_specific_context_cls(pipeline_name, context_name, context_definition), is_optional=True, ) return SystemNamedSelector(full_type_name, field_dict)
def get_inputs_field(solid, handle, dependency_structure, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.str_param(pipeline_name, 'pipeline_name') if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.runtime_type.input_hydration_config: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps(inp_handle) and not solid.container_maps_input( name ): inputs_field_fields[name] = Field( inp.runtime_type.input_hydration_config.schema_type ) if not inputs_field_fields: return None return Field( SystemNamedDict( '{pipeline_name}.{solid_handle}.Inputs'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase() ), inputs_field_fields, ) )
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) pipeline_name = camelcase(creation_data.pipeline_name) return SystemNamedDict( name='{pipeline_name}.Environment'.format(pipeline_name=pipeline_name), fields={ 'context': define_maybe_optional_selector_field( define_context_context_cls(pipeline_name, creation_data.context_definitions) ), 'solids': Field( define_solid_dictionary_cls( '{pipeline_name}.SolidsConfigDictionary'.format(pipeline_name=pipeline_name), creation_data, ) ), 'expectations': Field( define_expectations_config_cls( '{pipeline_name}.ExpectationsConfig'.format(pipeline_name=pipeline_name) ) ), 'storage': Field( define_storage_config_cls( '{pipeline_name}.StorageConfig'.format(pipeline_name=pipeline_name) ), is_optional=True, ), 'execution': Field( define_execution_config_cls( '{pipeline_name}.ExecutionConfig'.format(pipeline_name=pipeline_name) ) ), }, )
def get_outputs_field(creation_data, solid): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) check.inst_param(solid, 'solid', Solid) solid_def = solid.definition if not solid_has_configurable_outputs(solid_def): return None output_dict_fields = {} for out in [ out for out in solid_def.output_defs if out.runtime_type.output_schema ]: output_dict_fields[out.name] = Field(type( out.runtime_type.output_schema.schema_type), is_optional=True) output_entry_dict = SystemNamedDict( '{pipeline_name}.{solid_name}.Outputs'.format( pipeline_name=camelcase(creation_data.pipeline_name), solid_name=camelcase(solid.name)), output_dict_fields, ) return Field(List(output_entry_dict), is_optional=True)
def define_storage_config_cls(name): check.str_param(name, 'name') return SystemNamedSelector( name, { 'in_memory': Field( SystemNamedDict('{parent_name}.InMem'.format(parent_name=name), {}), is_optional=True, ), 'filesystem': Field( SystemNamedDict( '{parent_name}.Files'.format(parent_name=name), {'base_dir': Field(String, is_optional=True)}, ), is_optional=True, ), 's3': Field( SystemNamedDict( '{parent_name}.S3'.format(parent_name=name), {'s3_bucket': Field(String)} ), is_optional=True, ), }, )
def get_outputs_field(solid, handle, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.str_param(pipeline_name, 'pipeline_name') solid_def = solid.definition if not solid_def.has_configurable_outputs: return None output_dict_fields = {} for name, out in solid_def.output_dict.items(): if out.runtime_type.output_schema: output_dict_fields[name] = Field(type( out.runtime_type.output_schema.schema_type), is_optional=True) output_entry_dict = SystemNamedDict( '{pipeline_name}.{solid_handle}.Outputs'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), output_dict_fields, ) return Field(List(output_entry_dict), is_optional=True)
def define_environment_cls(pipeline_def): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) pipeline_name = camelcase(pipeline_def.name) return SystemNamedDict( name='{pipeline_name}.Environment'.format(pipeline_name=pipeline_name), fields={ 'context': define_maybe_optional_selector_field( define_context_context_cls(pipeline_name, pipeline_def.context_definitions)), 'solids': Field( define_solid_dictionary_cls( '{pipeline_name}.SolidsConfigDictionary'.format( pipeline_name=pipeline_name), pipeline_def, )), 'expectations': Field( define_expectations_config_cls( '{pipeline_name}.ExpectationsConfig'.format( pipeline_name=pipeline_name))), 'execution': Field( define_execution_config_cls( '{pipeline_name}.ExecutionConfig'.format( pipeline_name=pipeline_name))), }, )
def define_maybe_optional_selector_field(config_cls): is_optional = _is_selector_field_optional(config_cls.inst()) return (Field( config_cls, is_optional=is_optional, default_value=apply_default_values(config_cls.inst(), None), ) if is_optional else Field(config_cls, is_optional=False))
def config_field_for_configurable_class(name, **field_opts): return Field( SystemNamedDict( name, {'module': Field(String), 'class': Field(String), 'config': Field(PermissiveDict())}, ), **field_opts )
def config_type(cls): return SystemNamedDict( 'S3ComputeLogManagerConfig', { 'bucket': Field(String), 'local_dir': Field(String, is_optional=True) }, )
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) pipeline_name = camelcase(creation_data.pipeline_name) return SolidContainerConfigDict( name='{pipeline_name}.Mode.{mode_name}.Environment'.format( pipeline_name=pipeline_name, mode_name=camelcase(creation_data.mode_definition.name)) if creation_data.mode_definition else '{pipeline_name}.Environment'.format(pipeline_name=pipeline_name), fields=remove_none_entries({ 'solids': Field( define_solid_dictionary_cls( '{pipeline_name}.SolidsConfigDictionary'.format( pipeline_name=pipeline_name), creation_data.solids, creation_data.dependency_structure, creation_data.pipeline_name, )), 'storage': Field( define_storage_config_cls( '{pipeline_name}.{mode_name}.StorageConfig'.format( pipeline_name=pipeline_name, mode_name=camelcase( creation_data.mode_definition.name), ), creation_data.mode_definition, ), is_optional=True, ), 'execution': Field( define_executor_config_cls( '{pipeline_name}.{mode_name}.ExecutionConfig'.format( pipeline_name=pipeline_name, mode_name=camelcase( creation_data.mode_definition.name), ), creation_data.mode_definition, ), is_optional=True, ), 'loggers': Field( define_logger_dictionary_cls( '{pipeline_name}.LoggerConfig'.format( pipeline_name=pipeline_name), creation_data, )), 'resources': Field( define_mode_resources_dictionary_cls( pipeline_name, creation_data.mode_definition)), }), )
def define_isolid_field(solid, handle, dependency_structure, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.str_param(pipeline_name, 'pipeline_name') if isinstance(solid.definition, CompositeSolidDefinition): composite_def = solid.definition child_solids_config_field = Field( define_solid_dictionary_cls( '{pipeline_name}.CompositeSolidsDict.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), composite_def.solids, composite_def.dependency_structure, pipeline_name, handle, )) composite_config_dict = { 'inputs': get_inputs_field(solid, handle, dependency_structure, pipeline_name), 'outputs': get_outputs_field(solid, handle, pipeline_name), } # Mask solid config for solids beneath this level if config mapping is provided if composite_def.has_config_mapping: composite_config_dict[ 'config'] = composite_def.config_mapping.config_field else: composite_config_dict['solids'] = child_solids_config_field return Field( SolidContainerConfigDict( '{name}CompositeSolidConfig'.format(name=str(handle)), remove_none_entries(composite_config_dict), handle=handle, child_solids_config_field=child_solids_config_field, )) elif isinstance(solid.definition, SolidDefinition): solid_config_type = define_solid_config_cls( '{pipeline_name}.SolidConfig.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), solid.definition.config_field, inputs_field=get_inputs_field(solid, handle, dependency_structure, pipeline_name), outputs_field=get_outputs_field(solid, handle, pipeline_name), ) return Field(solid_config_type) else: check.invariant('Unexpected ISolidDefinition type {type}'.format( type=type(solid.definition)))
def logger(config_field=None, config=None, description=None): '''A decorator for creating a logger. The decorated function will be used as the logger_fn in a LoggerDefinition. Args: config (Dict[str, Field]): The schema for the configuration data made available to the logger_fn config_field (Field): Used in the rare case of a top level config type other than a dictionary. Only one of config or config_field can be provided. description (str) ''' # This case is for when decorator is used bare, without arguments. # E.g. @logger versus @logger() if callable(config_field): return LoggerDefinition(logger_fn=config_field, config_field=Field(Dict({}))) config_field = resolve_config_field(config_field, config, '@logger') def _wrap(logger_fn): return LoggerDefinition(logger_fn, config_field, description) return _wrap
def get_inputs_field(creation_data, solid): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) check.inst_param(solid, 'solid', Solid) if not solid_has_configurable_inputs(solid.definition): return None inputs_field_fields = {} for inp in [inp for inp in solid.definition.input_defs if inp.runtime_type.input_schema]: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not creation_data.dependency_structure.has_dep(inp_handle): inputs_field_fields[inp.name] = FieldImpl(inp.runtime_type.input_schema.schema_type) if not inputs_field_fields: return None return Field( SystemNamedDict( '{pipeline_name}.{solid_name}.Inputs'.format( pipeline_name=camelcase(creation_data.pipeline_name), solid_name=camelcase(solid.name), ), inputs_field_fields, ) )
def define_isolid_field(solid, handle, dependency_structure, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.str_param(pipeline_name, 'pipeline_name') if isinstance(solid.definition, CompositeSolidDefinition): composite_def = solid.definition solid_cfg = Field( define_solid_dictionary_cls( '{pipeline_name}.CompositeSolidsDict.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), composite_def.solids, composite_def.dependency_structure, pipeline_name, handle, )) return Field( SystemNamedDict( '{name}CompositeSolidConfig'.format(name=str(handle)), remove_none_entries({ 'solids': solid_cfg, 'inputs': get_inputs_field(solid, handle, dependency_structure, pipeline_name), 'outputs': get_outputs_field(solid, handle, pipeline_name), }), )) elif isinstance(solid.definition, SolidDefinition): solid_config_type = define_solid_config_cls( '{pipeline_name}.SolidConfig.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), solid.definition.config_field, inputs_field=get_inputs_field(solid, handle, dependency_structure, pipeline_name), outputs_field=get_outputs_field(solid, handle, pipeline_name), ) return Field(solid_config_type) else: check.invariant('Unexpected ISolidDefinition type {type}'.format( type=type(solid.definition)))
def define_resource_dictionary_cls(name, resource_defs): check.str_param(name, 'name') check.dict_param(resource_defs, 'resource_defs', key_type=str, value_type=ResourceDefinition) fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_field: fields[resource_name] = Field(define_resource_cls(name, resource_name, resource_def)) return SystemNamedDict(name=name, fields=fields)
def define_specific_context_config_cls(name, config_field, resources): check.str_param(name, 'name') check_opt_field_param(config_field, 'config_field') check.dict_param(resources, 'resources', key_type=str, value_type=ResourceDefinition) return SystemNamedDict( name, fields=remove_none_entries( { 'config': config_field, 'resources': Field( define_resource_dictionary_cls('{name}.Resources'.format(name=name), resources) ), 'persistence': Field( SystemNamedSelector( '{name}.Persistence'.format(name=name), {'file': Field(Dict({}))} ) ), } ), )
def define_dagster_config_cls(): return SystemNamedDict( 'DagsterInstanceConfig', { 'features': Field(PermissiveDict(), is_optional=True), 'root_storage': config_field_for_configurable_class( 'DagsterInstanceRootStorageConfig', is_optional=True), 'compute_logs': config_field_for_configurable_class( 'DagsterInstanceComputeLogsConfig', is_optional=True), }, )
def resolve_config_field(config_field, config, source): if config_field is not None and config is not None: raise DagsterInvalidDefinitionError( 'Must only provide one of config_field or config but not both in {}.' 'Using the config arg is equivalent to config_field=Field(Dict(...)).' .format(source)) if config_field: return config_field if config: return Field(Dict(config)) return None
def logger(config_field=None, description=None): '''A decorator for creating a logger. The decorated function will be used as the logger_fn in a LoggerDefinition. ''' # This case is for when decorator is used bare, without arguments. # E.g. @logger versus @logger() if callable(config_field): return LoggerDefinition(logger_fn=config_field, config_field=Field(Dict({}))) def _wrap(logger_fn): return LoggerDefinition(logger_fn, config_field, description) return _wrap
def define_executor_config_cls(type_name, mode_definition): check.str_param(type_name, 'type_name') check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( SystemNamedDict( name='{type_name}.{executor_name}'.format( type_name=type_name, executor_name=camelcase(executor_def.name) ), fields={'config': executor_def.config_field} if executor_def.config_field else {}, ) ) return SystemNamedSelector(type_name, fields)
def define_storage_config_cls(type_name, mode_definition): check.str_param(type_name, 'type_name') check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for storage_def in mode_definition.system_storage_defs: fields[storage_def.name] = Field( SystemNamedDict( name='{type_name}.{storage_name}'.format( type_name=type_name, storage_name=camelcase(storage_def.name) ), fields={'config': storage_def.config_field} if storage_def.config_field else {}, ) ) return SystemNamedSelector(type_name, fields)
def define_solid_dictionary_cls(name, pipeline_def): check.str_param(name, 'name') check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) fields = {} for solid in pipeline_def.solids: if solid_has_config_entry(solid.definition): solid_config_type = define_solid_config_cls( '{pipeline_name}.SolidConfig.{solid_name}'.format( pipeline_name=camelcase(pipeline_def.name), solid_name=camelcase(solid.name)), solid.definition.config_field, inputs_field=get_inputs_field(pipeline_def, solid), outputs_field=get_outputs_field(pipeline_def, solid), ) fields[solid.name] = Field(solid_config_type) return SystemNamedDict(name, fields)
def define_solid_dictionary_cls(name, creation_data): check.str_param(name, 'name') check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) fields = {} for solid in creation_data.solids: if solid_has_config_entry(solid.definition): solid_config_type = define_solid_config_cls( '{pipeline_name}.SolidConfig.{solid_name}'.format( pipeline_name=camelcase(creation_data.pipeline_name), solid_name=camelcase(solid.name), ), solid.definition.config_field, inputs_field=get_inputs_field(creation_data, solid), outputs_field=get_outputs_field(creation_data, solid), ) fields[solid.name] = Field(solid_config_type) return SystemNamedDict(name, fields)
def define_logger_dictionary_cls(name, creation_data): check.str_param(name, 'name') check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) fields = {} for logger_name, logger_definition in creation_data.logger_defs.items(): fields[logger_name] = Field( SystemNamedDict( '{pipeline_name}.LoggerConfig.{logger_name}'.format( pipeline_name=camelcase(creation_data.pipeline_name), logger_name=camelcase(logger_name), ), remove_none_entries({'config': logger_definition.config_field}), ), is_optional=True, ) return SystemNamedDict(name, fields)
def define_dagster_config_cls(): return SystemNamedDict( 'DagsterInstanceConfig', { 'features': Field(PermissiveDict(), is_optional=True), 'local_artifact_storage': config_field_for_configurable_class( 'DagsterInstanceLocalArtifactStorageConfig', is_optional=True), 'compute_logs': config_field_for_configurable_class( 'DagsterInstanceComputeLogsConfig', is_optional=True), 'run_storage': config_field_for_configurable_class( 'DagsterInstanceRunStorageConfig', is_optional=True), 'event_log_storage': config_field_for_configurable_class( 'DagsterInstanceEventLogStorageConfig', is_optional=True), }, )
def _default_config_field(): return Field( Dict({ 'log_level': Field(LogLevelEnum, is_optional=True, default_value='INFO') }))
def create_mem_system_storage_data(init_context): return SystemStorageData( run_storage=InMemoryRunStorage(), intermediates_manager=InMemoryIntermediatesManager(), file_manager=LocalFileManager.for_run_id(init_context.run_config.run_id), ) @system_storage(name='in_memory', is_persistent=False) def mem_system_storage(init_context): return create_mem_system_storage_data(init_context) @system_storage( name='filesystem', is_persistent=True, config={'base_dir': Field(String, is_optional=True)} ) def fs_system_storage(init_context): base_dir = init_context.system_storage_config.get('base_dir') return SystemStorageData( file_manager=LocalFileManager.for_run_id(init_context.run_config.run_id), run_storage=FileSystemRunStorage(base_dir=base_dir), intermediates_manager=IntermediateStoreIntermediatesManager( FileSystemIntermediateStore( run_id=init_context.run_config.run_id, type_storage_plugin_registry=init_context.type_storage_plugin_registry, base_dir=base_dir, ) ), )
def config_type(cls): return SystemNamedDict('SqliteEventLogStorageConfig', {'base_dir': Field(String)})
def config_type(cls): return SystemNamedDict('LocalArtifactStorageConfig', {'base_dir': Field(String)})