def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.dagster_type.loader: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps( inp_handle) and not solid.container_maps_input(name): inputs_field_fields[name] = Field( inp.dagster_type.loader.schema_type, is_required=(not solid.definition.input_has_default(name)), ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def get_inputs_field( solid: Node, dependency_structure: DependencyStructure, resource_defs: Dict[str, ResourceDefinition], solid_ignored: bool, ): inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): inp_handle = SolidInputHandle(solid, inp) has_upstream = input_has_upstream(dependency_structure, inp_handle, solid, name) if inp.root_manager_key and not has_upstream: input_field = get_input_manager_input_field( solid, inp, resource_defs) elif inp.dagster_type.loader and not has_upstream: input_field = get_type_loader_input_field(solid, name, inp) else: input_field = None if input_field: inputs_field_fields[name] = input_field if not inputs_field_fields: return None if solid_ignored: return Field( Shape(inputs_field_fields), is_required=False, description= "This solid is not present in the current solid selection, " "the input config values are allowed but ignored.", ) else: return Field(Shape(inputs_field_fields))
def _get_target_config(): return { "python_file": ScalarUnion( scalar_type=str, non_scalar_schema={ "relative_path": StringSource, "attribute": Field(StringSource, is_required=False), "location_name": Field(StringSource, is_required=False), "working_directory": Field(StringSource, is_required=False), "executable_path": Field(StringSource, is_required=False), }, ), "python_module": ScalarUnion( scalar_type=str, non_scalar_schema={ "module_name": StringSource, "attribute": Field(StringSource, is_required=False), "location_name": Field(StringSource, is_required=False), "executable_path": Field(StringSource, is_required=False), }, ), "python_package": ScalarUnion( scalar_type=str, non_scalar_schema={ "package_name": StringSource, "attribute": Field(StringSource, is_required=False), "location_name": Field(StringSource, is_required=False), "executable_path": Field(StringSource, is_required=False), }, ), }
def config_field_for_configurable_class(): return Field({ "module": str, "class": str, "config": Field(Permissive()) }, is_required=False)
def config_type(cls): return { "max_concurrent_runs": Field(config=IntSource, is_required=False), "tag_concurrency_limits": Field( config=Noneable( Array( Shape({ "key": String, "value": Field( ScalarUnion( scalar_type=String, non_scalar_schema=Shape( {"applyLimitPerUniqueValue": Bool}), ), is_required=False, ), "limit": Field(int), }))), is_required=False, ), "dequeue_interval_seconds": Field(config=IntSource, is_required=False), }
def define_run_config_schema_type( creation_data: RunConfigSchemaCreationData) -> ConfigType: execution_field = (define_execution_field( creation_data.mode_definition.executor_defs) if not creation_data.is_using_graph_job_op_apis else define_single_execution_field( creation_data.mode_definition.executor_defs[0])) top_level_node = Node( name=creation_data.graph_def.name, definition=creation_data.graph_def, graph_definition=creation_data.graph_def, ) fields = { "execution": execution_field, "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs, creation_data.required_resources, )), "inputs": get_inputs_field( solid=top_level_node, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, solid_ignored=False, ), } if creation_data.graph_def.has_config_mapping: config_schema = cast(IDefinitionConfigSchema, creation_data.graph_def.config_schema) nodes_field = Field({"config": config_schema.as_field()}) else: nodes_field = Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, is_using_graph_job_op_apis=creation_data. is_using_graph_job_op_apis, )) if creation_data.is_using_graph_job_op_apis: fields["ops"] = nodes_field field_aliases = {"ops": "solids"} else: fields["solids"] = nodes_field field_aliases = {"solids": "ops"} return Shape( fields=remove_none_entries(fields), field_aliases=field_aliases, )
def get_outputs_field(solid, handle, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.dict_param(resource_defs, "resource_defs", key_type=str, value_type=ResourceDefinition) # if any outputs have configurable output managers, use those for the schema and ignore all type # materializers output_manager_fields = {} for name, output_def in solid.definition.output_dict.items(): output_manager_output_field = get_output_manager_output_field( solid, output_def, resource_defs) if output_manager_output_field: output_manager_fields[name] = output_manager_output_field if output_manager_fields: return Field(Shape(output_manager_fields)) # otherwise, use any type materializers for the schema type_materializer_fields = {} for name, output_def in solid.definition.output_dict.items(): type_output_field = get_type_output_field(output_def) if type_output_field: type_materializer_fields[name] = type_output_field if type_materializer_fields: return Field(Array(Shape(type_materializer_fields)), is_required=False) return None
def get_outputs_field( solid: Node, resource_defs: Dict[str, ResourceDefinition], ) -> Optional[Field]: # if any outputs have configurable output managers, use those for the schema and ignore all type # materializers output_manager_fields = {} for name, output_def in solid.definition.output_dict.items(): output_manager_output_field = get_output_manager_output_field( solid, output_def, resource_defs) if output_manager_output_field: output_manager_fields[name] = output_manager_output_field if output_manager_fields: return Field(Shape(output_manager_fields)) # otherwise, use any type materializers for the schema type_materializer_fields = {} for name, output_def in solid.definition.output_dict.items(): type_output_field = get_type_output_field(output_def) if type_output_field: type_materializer_fields[name] = type_output_field if type_materializer_fields: return Field(Array(Shape(type_materializer_fields)), is_required=False) return None
def config_field_for_configurable_class(): return Field({ 'module': str, 'class': str, 'config': Field(Permissive()) }, is_optional=True)
def define_dagster_config_cls(): return { 'local_artifact_storage': config_field_for_configurable_class(), 'compute_logs': config_field_for_configurable_class(), 'run_storage': config_field_for_configurable_class(), 'event_log_storage': config_field_for_configurable_class(), 'schedule_storage': config_field_for_configurable_class(), 'scheduler': config_field_for_configurable_class(), 'run_launcher': config_field_for_configurable_class(), 'dagit': Field( { 'execution_manager': Field({'max_concurrent_runs': int}, is_required=False) }, is_required=False, ), }
def config_type(cls): return { "max_catchup_runs": Field( IntSource, is_required=False, default_value=DEFAULT_MAX_CATCHUP_RUNS, description= """For partitioned schedules, controls the maximum number of past partitions for each schedule that will be considered when looking for missing runs . Generally this parameter will only come into play if the scheduler falls behind or launches after experiencing downtime. This parameter will not be checked for schedules without partition sets (for example, schedules created using the @schedule decorator) - only the most recent execution time will be considered for those schedules. Note that no matter what this value is, the scheduler will never launch a run from a time before the schedule was turned on (even if the start_date on the schedule is earlier) - if you want to launch runs for earlier partitions, launch a backfill. """, ), "max_tick_retries": Field( IntSource, default_value=0, is_required=False, description= "For each schedule tick that raises an error, how many times to retry that tick", ), }
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) return Shape(fields=remove_none_entries({ 'solids': Field( define_solid_dictionary_cls( creation_data.solids, creation_data.dependency_structure, )), 'storage': Field( define_storage_config_cls(creation_data.mode_definition), is_optional=True, ), 'execution': Field( define_executor_config_cls(creation_data.mode_definition), is_optional=True, ), 'loggers': Field(define_logger_dictionary_cls(creation_data)), 'resources': Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )
def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.runtime_type.input_hydration_config: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps(inp_handle) and not solid.container_maps_input( name ): inputs_field_fields[name] = Field( inp.runtime_type.input_hydration_config.schema_type ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def config_field_for_configurable_class(): return Field({ 'module': str, 'class': str, 'config': Field(Permissive()) }, is_required=False)
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) if not (intermediate_storage_field.is_required or intermediate_storage_field.default_provided): storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.system_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.system_storage_defs ], defaults=set( [storage.name for storage in default_system_storage_defs]), ) else: storage_field = None return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )
def __call__(self, fn: Callable) -> AssetsDefinition: asset_name = self.name or fn.__name__ ins_by_input_names: Mapping[str, In] = build_asset_ins( fn, self.namespace, self.ins or {}, self.non_argument_deps) partition_fn: Optional[Callable] = None if self.partitions_def: def partition_fn(context): # pylint: disable=function-redefined return [context.partition_key] out_asset_key = AssetKey( list(filter(None, [*(self.namespace or []), asset_name]))) out = Out( asset_key=out_asset_key, metadata=self.metadata or {}, io_manager_key=self.io_manager_key, dagster_type=self.dagster_type, asset_partitions_def=self.partitions_def, asset_partitions=partition_fn, ) op = _Op( name=asset_name, description=self.description, ins={ input_name: in_def for input_name, in_def in ins_by_input_names.items() }, # convert Mapping object to dict out=out, required_resource_keys=self.required_resource_keys, tags={"kind": self.compute_kind} if self.compute_kind else None, config_schema={ "assets": { "input_partitions": Field(dict, is_required=False), "output_partitions": Field(dict, is_required=False), } }, )(fn) return AssetsDefinition( input_names_by_asset_key={ in_def.asset_key: input_name for input_name, in_def in ins_by_input_names.items() }, output_names_by_asset_key={out_asset_key: "result"}, op=op, partitions_def=self.partitions_def, partition_mappings={ ins_by_input_names[input_name].asset_key: partition_mapping for input_name, partition_mapping in self.partition_mappings.items() } if self.partition_mappings else None, )
def solid_config_field(fields, ignored): if ignored: return Field( Shape(remove_none_entries(fields)), is_required=False, description= "This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(remove_none_entries(fields)))
def dagster_instance_config_schema(): return { "local_artifact_storage": config_field_for_configurable_class(), "compute_logs": config_field_for_configurable_class(), "run_storage": config_field_for_configurable_class(), "event_log_storage": config_field_for_configurable_class(), "schedule_storage": config_field_for_configurable_class(), "scheduler": config_field_for_configurable_class(), "run_launcher": config_field_for_configurable_class(), "telemetry": Field({"enabled": Field(Bool, is_required=False)}), "opt_in": Field({"local_servers": Field(Bool, is_required=False)}), }
def define_execution_field(executor_defs: List[ExecutorDefinition]) -> Field: default_in_process = False for executor_def in executor_defs: if executor_def == in_process_executor: # pylint: disable=comparison-with-callable default_in_process = True selector = selector_for_named_defs(executor_defs) if default_in_process: return Field(selector, default_value={in_process_executor.name: {}}) return Field(selector)
def dagster_instance_config_schema(): return { "local_artifact_storage": config_field_for_configurable_class(), "compute_logs": config_field_for_configurable_class(), "run_storage": config_field_for_configurable_class(), "event_log_storage": config_field_for_configurable_class(), "schedule_storage": config_field_for_configurable_class(), "scheduler": config_field_for_configurable_class(), "run_coordinator": config_field_for_configurable_class(), "run_launcher": config_field_for_configurable_class(), "telemetry": Field({"enabled": Field(Bool, is_required=False)}), "sensor_settings": Field({"interval_seconds": Field(int, is_required=False)}), "custom_instance_class": Field({"module": str, "class": str}, is_required=False), }
def define_storage_field(storage_selector, storage_names, defaults): """Define storage field using default options, if additional storage options have been provided.""" # If no custom storage options have been provided, # then users do not need to provide any configuration. if set(storage_names) == defaults: return Field(storage_selector, is_required=False) else: default_storage = FIELD_NO_DEFAULT_PROVIDED if len(storage_names) > 0: def_key = list(storage_names)[0] possible_default = storage_selector.fields[def_key] if all_optional_type(possible_default.config_type): default_storage = {def_key: {}} return Field(storage_selector, default_value=default_storage)
def dagster_instance_config_schema(): return { "local_artifact_storage": config_field_for_configurable_class(), "compute_logs": config_field_for_configurable_class(), "run_storage": config_field_for_configurable_class(), "event_log_storage": config_field_for_configurable_class(), "schedule_storage": config_field_for_configurable_class(), "scheduler": config_field_for_configurable_class(), "run_coordinator": config_field_for_configurable_class(), "run_launcher": config_field_for_configurable_class(), "telemetry": Field({"enabled": Field(Bool, is_required=False)}), "custom_instance_class": config_field_for_configurable_class(), "backfill": Field({"daemon_enabled": Field(Bool, is_required=False)}), }
def _config_mapping_with_default_value( inner_schema: ConfigType, default_config: Dict[str, Any], job_name: str, graph_name: str, ) -> ConfigMapping: if not isinstance(inner_schema, Shape): check.failed( "Only Shape (dictionary) config_schema allowed on Job ConfigMapping" ) def config_fn(x): return x updated_fields = {} field_aliases = inner_schema.field_aliases for name, field in inner_schema.fields.items(): if name in default_config: updated_fields[name] = Field( config=field.config_type, default_value=default_config[name], description=field.description, ) elif name in field_aliases and field_aliases[name] in default_config: updated_fields[name] = Field( config=field.config_type, default_value=default_config[field_aliases[name]], description=field.description, ) else: updated_fields[name] = field config_schema = Shape( fields=updated_fields, description="run config schema with default values from default_config", field_aliases=inner_schema.field_aliases, ) config_evr = validate_config(config_schema, default_config) if not config_evr.success: raise DagsterInvalidConfigError( f"Error in config when building job '{job_name}' from graph '{graph_name}' ", config_evr.errors, default_config, ) return ConfigMapping(config_fn=config_fn, config_schema=config_schema, receive_processed_config_values=False)
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) # TODO: remove "storage" entry in run_config as part of system storage removal # currently we treat "storage" as an alias to "intermediate_storage" and storage field is optional # tracking https://github.com/dagster-io/dagster/issues/3280 storage_field = Field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), is_required=False, ) return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )
def _maybe_include_executable_path(config_dict, include_executable_path): return merge_dicts( config_dict, ({ "executable_path": Field(StringSource, is_required=False) } if include_executable_path else {}), )
def solid_config_field(fields: Dict[str, Optional[Field]], ignored: bool) -> Optional[Field]: trimmed_fields = remove_none_entries(fields) if trimmed_fields: if ignored: return Field( Shape(trimmed_fields), is_required=False, description= "This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(trimmed_fields)) else: return None
def get_type_loader_input_field(solid: Node, input_name: str, input_def: InputDefinition) -> Field: return Field( input_def.dagster_type.loader.schema_type, is_required=(not solid.definition.input_has_default(input_name) and not input_def.root_manager_key), )
def _get_host_mode_executor(recon_pipeline, run_config, executor_defs, instance): execution_config = run_config.get("execution", {}) execution_config_type = Field(selector_for_named_defs(executor_defs), default_value={ executor_defs[0].name: {} }).config_type config_evr = process_config(execution_config_type, execution_config) if not config_evr.success: raise DagsterInvalidConfigError( "Error processing execution config {}".format(execution_config), config_evr.errors, execution_config, ) execution_config_value = config_evr.value executor_name, executor_config = ensure_single_item(execution_config_value) executor_defs_by_name = { executor_def.name: executor_def for executor_def in executor_defs } executor_def = executor_defs_by_name[executor_name] init_context = InitExecutorContext( job=recon_pipeline, executor_def=executor_def, executor_config=executor_config["config"], instance=instance, ) check_cross_process_constraints(init_context) return executor_def.executor_creation_fn(init_context)
def def_config_field(configurable_def: ConfigurableDefinition, is_required: bool = None) -> Field: return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def get_inputs_field(solid, handle, dependency_structure, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): inp_handle = SolidInputHandle(solid, inp) has_upstream = input_has_upstream(dependency_structure, inp_handle, solid, name) if inp.root_manager_key and not has_upstream: input_field = get_input_manager_input_field( solid, inp, resource_defs) elif inp.dagster_type.loader and not has_upstream: input_field = get_type_loader_input_field(solid, name, inp) else: input_field = None if input_field: inputs_field_fields[name] = input_field if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))