def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=False) for solid in ignored_solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=True) return Shape(fields)
def __new__( cls, pipeline_name, solids, dependency_structure, mode_definition, logger_defs, ignored_solids, ): return super(EnvironmentClassCreationData, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, "pipeline_name"), solids=check.list_param(solids, "solids", of_type=Solid), dependency_structure=check.inst_param(dependency_structure, "dependency_structure", DependencyStructure), mode_definition=check.inst_param(mode_definition, "mode_definition", ModeDefinition), logger_defs=check.dict_param(logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition), ignored_solids=check.list_param(ignored_solids, "ignored_solids", of_type=Solid), )
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, resource_defs, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=False, ) if solid_field: fields[solid.name] = solid_field for solid in ignored_solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=True, ) if solid_field: fields[solid.name] = solid_field return Shape(fields)
def _is_selector_field_optional(config_type): check.inst_param(config_type, 'config_type', ConfigType) if len(config_type.fields) > 1: return False else: _name, field = single_item(config_type.fields) return field.is_optional
def define_isolid_field(solid, handle, dependency_structure): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) # All solids regardless of compositing status get the same inputs and outputs # config. The only thing the varies is on extra element of configuration # 1) Vanilla solid definition: a 'config' key with the config_schema as the value # 2) Composite with field mapping: a 'config' key with the config_schema of # the config mapping # 3) Composite without field mapping: a 'solids' key with recursively defined # solids dictionary if isinstance(solid.definition, SolidDefinition): return construct_leaf_solid_config( solid, handle, dependency_structure, solid.definition.config_schema ) composite_def = check.inst(solid.definition, CompositeSolidDefinition) if composite_def.has_config_mapping: return construct_leaf_solid_config( solid, handle, dependency_structure, composite_def.config_mapping.config_schema ) else: return filtered_system_dict( { 'inputs': get_inputs_field(solid, handle, dependency_structure), 'outputs': get_outputs_field(solid, handle), 'solids': Field( define_solid_dictionary_cls( composite_def.solids, composite_def.dependency_structure, handle, ) ), } )
def _is_selector_field_optional(config_type): check.inst_param(config_type, "config_type", ConfigType) if len(config_type.fields) > 1: return False else: _name, field = ensure_single_item(config_type.fields) return not field.is_required
def construct_config_type_dictionary(node_defs, environment_type): check.list_param(node_defs, "node_defs", NodeDefinition) check.inst_param(environment_type, "environment_type", ConfigType) type_dict_by_name = { t.given_name: t for t in ALL_CONFIG_BUILTINS if t.given_name } type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS} all_types = list(_gather_all_config_types( node_defs, environment_type)) + list(_gather_all_schemas(node_defs)) for config_type in all_types: name = config_type.given_name if name and name in type_dict_by_name: if type(config_type) is not type(type_dict_by_name[name]): raise DagsterInvalidDefinitionError(( "Type names must be unique. You have constructed two different " 'instances of types with the same name "{name}".').format( name=name)) elif name: type_dict_by_name[name] = config_type type_dict_by_key[config_type.key] = config_type return type_dict_by_name, type_dict_by_key
def get_outputs_field(solid, handle, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.dict_param(resource_defs, "resource_defs", key_type=str, value_type=ResourceDefinition) # if any outputs have configurable output managers, use those for the schema and ignore all type # materializers output_manager_fields = {} for name, output_def in solid.definition.output_dict.items(): output_manager_output_field = get_output_manager_output_field( solid, output_def, resource_defs) if output_manager_output_field: output_manager_fields[name] = output_manager_output_field if output_manager_fields: return Field(Shape(output_manager_fields)) # otherwise, use any type materializers for the schema type_materializer_fields = {} for name, output_def in solid.definition.output_dict.items(): type_output_field = get_type_output_field(output_def) if type_output_field: type_materializer_fields[name] = type_output_field if type_materializer_fields: return Field(Array(Shape(type_materializer_fields)), is_required=False) return None
def construct_config_type_dictionary(solid_defs, environment_type): check.list_param(solid_defs, 'solid_defs', ISolidDefinition) check.inst_param(environment_type, 'environment_type', ConfigType) type_dict_by_name = {t.name: t for t in ALL_CONFIG_BUILTINS} type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS} all_types = list(_gather_all_config_types( solid_defs, environment_type)) + list(_gather_all_schemas(solid_defs)) for config_type in all_types: name = config_type.name if name and name in type_dict_by_name: if type(config_type) is not type(type_dict_by_name[name]): raise DagsterInvalidDefinitionError(( 'Type names must be unique. You have constructed two different ' 'instances of types with the same name "{name}".').format( name=name)) elif name: type_dict_by_name[config_type.name] = config_type key = config_type.key if key in type_dict_by_key: if type(config_type) is not type(type_dict_by_key[key]): raise DagsterInvalidDefinitionError(( 'Type keys must be unique. You have constructed two different ' 'instances of types with the same key "{key}".').format( key=key)) else: type_dict_by_key[config_type.key] = config_type return type_dict_by_name, type_dict_by_key
def get_outputs_field(solid, handle, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.str_param(pipeline_name, 'pipeline_name') solid_def = solid.definition if not solid_def.has_configurable_outputs: return None output_dict_fields = {} for name, out in solid_def.output_dict.items(): if out.runtime_type.output_materialization_config: output_dict_fields[name] = Field( type(out.runtime_type.output_materialization_config.schema_type), is_optional=True ) output_entry_dict = SystemNamedDict( '{pipeline_name}.{solid_handle}.Outputs'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase() ), output_dict_fields, ) return Field(List[output_entry_dict], is_optional=True)
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) return Shape( fields=remove_none_entries( { 'solids': Field( define_solid_dictionary_cls( creation_data.solids, creation_data.dependency_structure, ) ), 'storage': Field( define_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'intermediate_storage': Field( define_intermediate_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'execution': Field( define_executor_config_cls(creation_data.mode_definition), is_required=False, ), 'loggers': Field(define_logger_dictionary_cls(creation_data)), 'resources': Field( define_resource_dictionary_cls(creation_data.mode_definition.resource_defs) ), } ), )
def _gather_all_config_types(node_defs, environment_type): check.list_param(node_defs, "node_defs", NodeDefinition) check.inst_param(environment_type, "environment_type", ConfigType) for node_def in node_defs: yield from iterate_node_def_config_types(node_def) yield from iterate_config_types(environment_type)
def def_config_field(configurable_def, is_required=None): check.inst_param(configurable_def, "configurable_def", ConfigurableDefinition) return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) pipeline_name = camelcase(creation_data.pipeline_name) return SolidContainerConfigDict( name='{pipeline_name}.Mode.{mode_name}.Environment'.format( pipeline_name=pipeline_name, mode_name=camelcase(creation_data.mode_definition.name)) if creation_data.mode_definition else '{pipeline_name}.Environment'.format(pipeline_name=pipeline_name), fields=remove_none_entries({ 'solids': Field( define_solid_dictionary_cls( '{pipeline_name}.SolidsConfigDictionary'.format( pipeline_name=pipeline_name), creation_data.solids, creation_data.dependency_structure, creation_data.pipeline_name, )), 'storage': Field( define_storage_config_cls( '{pipeline_name}.{mode_name}.StorageConfig'.format( pipeline_name=pipeline_name, mode_name=camelcase( creation_data.mode_definition.name), ), creation_data.mode_definition, ), is_optional=True, ), 'execution': Field( define_executor_config_cls( '{pipeline_name}.{mode_name}.ExecutionConfig'.format( pipeline_name=pipeline_name, mode_name=camelcase( creation_data.mode_definition.name), ), creation_data.mode_definition, ), is_optional=True, ), 'loggers': Field( define_logger_dictionary_cls( '{pipeline_name}.LoggerConfig'.format( pipeline_name=pipeline_name), creation_data, )), 'resources': Field( define_mode_resources_dictionary_cls( pipeline_name, creation_data.mode_definition)), }), )
def define_mode_resources_dictionary_cls(pipeline_name, mode_definition): check.str_param(pipeline_name, 'pipeline_name') check.inst_param(mode_definition, 'mode_definition', ModeDefinition) return define_resource_dictionary_cls( '{pipeline_name}.Mode.{mode}.Resources'.format( pipeline_name=pipeline_name, mode=camelcase(mode_definition.name)), mode_definition.resource_defs, )
def define_isolid_field(solid, handle, dependency_structure, resource_defs, ignored): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) # All solids regardless of compositing status get the same inputs and outputs # config. The only thing the varies is on extra element of configuration # 1) Vanilla solid definition: a 'config' key with the config_schema as the value # 2) Composite with field mapping: a 'config' key with the config_schema of # the config mapping (via CompositeSolidDefinition#config_schema) # 3) Composite without field mapping: a 'solids' key with recursively defined # solids dictionary # 4) `configured` composite with field mapping: a 'config' key with the config_schema that was # provided when `configured` was called (via CompositeSolidDefinition#config_schema) if isinstance(solid.definition, SolidDefinition): return construct_leaf_solid_config( solid, handle, dependency_structure, solid.definition.config_schema, resource_defs, ignored, ) graph_def = check.inst(solid.definition, GraphDefinition) if graph_def.has_config_mapping: # has_config_mapping covers cases 2 & 4 from above (only config mapped composite solids can # be `configured`)... return construct_leaf_solid_config( solid, handle, dependency_structure, # ...and in both cases, the correct schema for 'config' key is exposed by this property: graph_def.config_schema, resource_defs, ignored, ) # This case omits a 'solids' key, thus if a composite solid is `configured` or has a field # mapping, the user cannot stub any config, inputs, or outputs for inner (child) solids. else: return solid_config_field( { "inputs": get_inputs_field(solid, handle, dependency_structure, resource_defs), "outputs": get_outputs_field(solid, handle, resource_defs), "solids": Field( define_solid_dictionary_cls( solids=graph_def.solids, ignored_solids=None, dependency_structure=graph_def.dependency_structure, parent_handle=handle, resource_defs=resource_defs, ) ), }, ignored=ignored, )
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) return Shape({ logger_name: def_config_field(logger_definition, is_required=False) for logger_name, logger_definition in creation_data.logger_defs.items() })
def define_isolid_field(solid, handle, dependency_structure, pipeline_name): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.str_param(pipeline_name, 'pipeline_name') if isinstance(solid.definition, CompositeSolidDefinition): composite_def = solid.definition child_solids_config_field = Field( define_solid_dictionary_cls( '{pipeline_name}.CompositeSolidsDict.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), composite_def.solids, composite_def.dependency_structure, pipeline_name, handle, )) composite_config_dict = { 'inputs': get_inputs_field(solid, handle, dependency_structure, pipeline_name), 'outputs': get_outputs_field(solid, handle, pipeline_name), } # Mask solid config for solids beneath this level if config mapping is provided if composite_def.has_config_mapping: composite_config_dict[ 'config'] = composite_def.config_mapping.config_field else: composite_config_dict['solids'] = child_solids_config_field return Field( SolidContainerConfigDict( '{name}CompositeSolidConfig'.format(name=str(handle)), remove_none_entries(composite_config_dict), handle=handle, child_solids_config_field=child_solids_config_field, )) elif isinstance(solid.definition, SolidDefinition): solid_config_type = define_solid_config_cls( '{pipeline_name}.SolidConfig.{solid_handle}'.format( pipeline_name=camelcase(pipeline_name), solid_handle=handle.camelcase()), solid.definition.config_field, inputs_field=get_inputs_field(solid, handle, dependency_structure, pipeline_name), outputs_field=get_outputs_field(solid, handle, pipeline_name), ) return Field(solid_config_type) else: check.invariant('Unexpected ISolidDefinition type {type}'.format( type=type(solid.definition)))
def _gather_all_config_types(solid_defs, environment_type): check.list_param(solid_defs, "solid_defs", ISolidDefinition) check.inst_param(environment_type, "environment_type", ConfigType) for solid_def in solid_defs: for config_type in iterate_solid_def_config_types(solid_def): yield config_type for config_type in iterate_config_types(environment_type): yield config_type
def _gather_all_config_types(solid_defs, environment_type): check.list_param(solid_defs, 'solid_defs', ISolidDefinition) check.inst_param(environment_type, 'environment_type', ConfigType) for solid_def in solid_defs: for runtime_type in iterate_solid_def_types(solid_def): yield runtime_type for runtime_type in iterate_config_types(environment_type): yield runtime_type
def define_storage_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for storage_def in mode_definition.system_storage_defs: fields[storage_def.name] = Field( SystemDict(fields={'config': storage_def.config_field} if storage_def.config_field else {}, )) return SystemSelector(fields)
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( SystemDict(fields={'config': executor_def.config_field} if executor_def.config_field else {}, )) return SystemSelector(fields)
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( Shape(fields={"config": executor_def.config_schema} if executor_def.config_schema else {}, )) return Selector(fields)
def define_intermediate_storage_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for intermediate_storage_def in mode_definition.intermediate_storage_defs: fields[intermediate_storage_def.name] = Field( Shape(fields={"config": intermediate_storage_def.config_schema} if intermediate_storage_def.config_schema else {}, )) return Selector(fields)
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) if not (intermediate_storage_field.is_required or intermediate_storage_field.default_provided): storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.system_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.system_storage_defs ], defaults=set( [storage.name for storage in default_system_storage_defs]), ) else: storage_field = None return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) fields = {} for logger_name, logger_definition in creation_data.logger_defs.items(): fields[logger_name] = Field( Shape(remove_none_entries({'config': logger_definition.config_schema}),), is_required=False, ) return Shape(fields)
def iterate_node_def_config_types(node_def): check.inst_param(node_def, "node_def", NodeDefinition) if isinstance(node_def, SolidDefinition): if node_def.has_config_field: yield from iterate_config_types(node_def.config_field.config_type) elif isinstance(node_def, GraphDefinition): for solid in node_def.solids: yield from iterate_node_def_config_types(solid.definition) else: check.invariant("Unexpected NodeDefinition type {type}".format(type=type(node_def)))
def define_solid_dictionary_cls(solids, dependency_structure, parent_handle=None): check.list_param(solids, 'solids', of_type=Solid) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, ) return Shape(fields)
def __new__(cls, pipeline_name, solids, dependency_structure, mode_definition, logger_defs): return super(EnvironmentClassCreationData, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, 'pipeline_name'), solids=check.list_param(solids, 'solids', of_type=Solid), dependency_structure=check.inst_param( dependency_structure, 'dependency_structure', DependencyStructure ), mode_definition=check.inst_param(mode_definition, 'mode_definition', ModeDefinition), logger_defs=check.dict_param( logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition ), )
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) # TODO: remove "storage" entry in run_config as part of system storage removal # currently we treat "storage" as an alias to "intermediate_storage" and storage field is optional # tracking https://github.com/dagster-io/dagster/issues/3280 storage_field = Field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), is_required=False, ) return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )