Example #1
0
def get_inputs_field(solid, handle, dependency_structure):
    check.inst_param(solid, "solid", Solid)
    check.inst_param(handle, "handle", SolidHandle)
    check.inst_param(dependency_structure, "dependency_structure",
                     DependencyStructure)

    if not solid.definition.has_configurable_inputs:
        return None

    inputs_field_fields = {}
    for name, inp in solid.definition.input_dict.items():
        if inp.dagster_type.loader:
            inp_handle = SolidInputHandle(solid, inp)
            # If this input is not satisfied by a dependency you must
            # provide it via config
            if not dependency_structure.has_deps(
                    inp_handle) and not solid.container_maps_input(name):

                inputs_field_fields[name] = Field(
                    inp.dagster_type.loader.schema_type,
                    is_required=(not solid.definition.input_has_default(name)),
                )

    if not inputs_field_fields:
        return None

    return Field(Shape(inputs_field_fields))
Example #2
0
def get_inputs_field(
    solid: Node,
    dependency_structure: DependencyStructure,
    resource_defs: Dict[str, ResourceDefinition],
    solid_ignored: bool,
):
    inputs_field_fields = {}
    for name, inp in solid.definition.input_dict.items():
        inp_handle = SolidInputHandle(solid, inp)
        has_upstream = input_has_upstream(dependency_structure, inp_handle,
                                          solid, name)
        if inp.root_manager_key and not has_upstream:
            input_field = get_input_manager_input_field(
                solid, inp, resource_defs)
        elif inp.dagster_type.loader and not has_upstream:
            input_field = get_type_loader_input_field(solid, name, inp)
        else:
            input_field = None

        if input_field:
            inputs_field_fields[name] = input_field

    if not inputs_field_fields:
        return None
    if solid_ignored:
        return Field(
            Shape(inputs_field_fields),
            is_required=False,
            description=
            "This solid is not present in the current solid selection, "
            "the input config values are allowed but ignored.",
        )
    else:
        return Field(Shape(inputs_field_fields))
Example #3
0
def _get_target_config():
    return {
        "python_file":
        ScalarUnion(
            scalar_type=str,
            non_scalar_schema={
                "relative_path": StringSource,
                "attribute": Field(StringSource, is_required=False),
                "location_name": Field(StringSource, is_required=False),
                "working_directory": Field(StringSource, is_required=False),
                "executable_path": Field(StringSource, is_required=False),
            },
        ),
        "python_module":
        ScalarUnion(
            scalar_type=str,
            non_scalar_schema={
                "module_name": StringSource,
                "attribute": Field(StringSource, is_required=False),
                "location_name": Field(StringSource, is_required=False),
                "executable_path": Field(StringSource, is_required=False),
            },
        ),
        "python_package":
        ScalarUnion(
            scalar_type=str,
            non_scalar_schema={
                "package_name": StringSource,
                "attribute": Field(StringSource, is_required=False),
                "location_name": Field(StringSource, is_required=False),
                "executable_path": Field(StringSource, is_required=False),
            },
        ),
    }
Example #4
0
def config_field_for_configurable_class():
    return Field({
        "module": str,
        "class": str,
        "config": Field(Permissive())
    },
                 is_required=False)
Example #5
0
 def config_type(cls):
     return {
         "max_concurrent_runs":
         Field(config=IntSource, is_required=False),
         "tag_concurrency_limits":
         Field(
             config=Noneable(
                 Array(
                     Shape({
                         "key":
                         String,
                         "value":
                         Field(
                             ScalarUnion(
                                 scalar_type=String,
                                 non_scalar_schema=Shape(
                                     {"applyLimitPerUniqueValue": Bool}),
                             ),
                             is_required=False,
                         ),
                         "limit":
                         Field(int),
                     }))),
             is_required=False,
         ),
         "dequeue_interval_seconds":
         Field(config=IntSource, is_required=False),
     }
Example #6
0
def define_run_config_schema_type(
        creation_data: RunConfigSchemaCreationData) -> ConfigType:
    execution_field = (define_execution_field(
        creation_data.mode_definition.executor_defs)
                       if not creation_data.is_using_graph_job_op_apis else
                       define_single_execution_field(
                           creation_data.mode_definition.executor_defs[0]))

    top_level_node = Node(
        name=creation_data.graph_def.name,
        definition=creation_data.graph_def,
        graph_definition=creation_data.graph_def,
    )

    fields = {
        "execution":
        execution_field,
        "loggers":
        Field(define_logger_dictionary_cls(creation_data)),
        "resources":
        Field(
            define_resource_dictionary_cls(
                creation_data.mode_definition.resource_defs,
                creation_data.required_resources,
            )),
        "inputs":
        get_inputs_field(
            solid=top_level_node,
            dependency_structure=creation_data.dependency_structure,
            resource_defs=creation_data.mode_definition.resource_defs,
            solid_ignored=False,
        ),
    }

    if creation_data.graph_def.has_config_mapping:
        config_schema = cast(IDefinitionConfigSchema,
                             creation_data.graph_def.config_schema)
        nodes_field = Field({"config": config_schema.as_field()})
    else:
        nodes_field = Field(
            define_solid_dictionary_cls(
                solids=creation_data.solids,
                ignored_solids=creation_data.ignored_solids,
                dependency_structure=creation_data.dependency_structure,
                resource_defs=creation_data.mode_definition.resource_defs,
                is_using_graph_job_op_apis=creation_data.
                is_using_graph_job_op_apis,
            ))

    if creation_data.is_using_graph_job_op_apis:
        fields["ops"] = nodes_field
        field_aliases = {"ops": "solids"}
    else:
        fields["solids"] = nodes_field
        field_aliases = {"solids": "ops"}

    return Shape(
        fields=remove_none_entries(fields),
        field_aliases=field_aliases,
    )
Example #7
0
def get_outputs_field(solid, handle, resource_defs):
    check.inst_param(solid, "solid", Solid)
    check.inst_param(handle, "handle", SolidHandle)
    check.dict_param(resource_defs,
                     "resource_defs",
                     key_type=str,
                     value_type=ResourceDefinition)

    # if any outputs have configurable output managers, use those for the schema and ignore all type
    # materializers
    output_manager_fields = {}
    for name, output_def in solid.definition.output_dict.items():
        output_manager_output_field = get_output_manager_output_field(
            solid, output_def, resource_defs)
        if output_manager_output_field:
            output_manager_fields[name] = output_manager_output_field

    if output_manager_fields:
        return Field(Shape(output_manager_fields))

    # otherwise, use any type materializers for the schema
    type_materializer_fields = {}
    for name, output_def in solid.definition.output_dict.items():
        type_output_field = get_type_output_field(output_def)
        if type_output_field:
            type_materializer_fields[name] = type_output_field

    if type_materializer_fields:
        return Field(Array(Shape(type_materializer_fields)), is_required=False)

    return None
Example #8
0
def get_outputs_field(
    solid: Node,
    resource_defs: Dict[str, ResourceDefinition],
) -> Optional[Field]:

    # if any outputs have configurable output managers, use those for the schema and ignore all type
    # materializers
    output_manager_fields = {}
    for name, output_def in solid.definition.output_dict.items():
        output_manager_output_field = get_output_manager_output_field(
            solid, output_def, resource_defs)
        if output_manager_output_field:
            output_manager_fields[name] = output_manager_output_field

    if output_manager_fields:
        return Field(Shape(output_manager_fields))

    # otherwise, use any type materializers for the schema
    type_materializer_fields = {}
    for name, output_def in solid.definition.output_dict.items():
        type_output_field = get_type_output_field(output_def)
        if type_output_field:
            type_materializer_fields[name] = type_output_field

    if type_materializer_fields:
        return Field(Array(Shape(type_materializer_fields)), is_required=False)

    return None
Example #9
0
def config_field_for_configurable_class():
    return Field({
        'module': str,
        'class': str,
        'config': Field(Permissive())
    },
                 is_optional=True)
Example #10
0
def define_dagster_config_cls():
    return {
        'local_artifact_storage':
        config_field_for_configurable_class(),
        'compute_logs':
        config_field_for_configurable_class(),
        'run_storage':
        config_field_for_configurable_class(),
        'event_log_storage':
        config_field_for_configurable_class(),
        'schedule_storage':
        config_field_for_configurable_class(),
        'scheduler':
        config_field_for_configurable_class(),
        'run_launcher':
        config_field_for_configurable_class(),
        'dagit':
        Field(
            {
                'execution_manager':
                Field({'max_concurrent_runs': int}, is_required=False)
            },
            is_required=False,
        ),
    }
Example #11
0
    def config_type(cls):
        return {
            "max_catchup_runs":
            Field(
                IntSource,
                is_required=False,
                default_value=DEFAULT_MAX_CATCHUP_RUNS,
                description=
                """For partitioned schedules, controls the maximum number of past
            partitions for each schedule that will be considered when looking for missing
            runs . Generally this parameter will only come into play if the scheduler
            falls behind or launches after experiencing downtime. This parameter will not be checked for
            schedules without partition sets (for example, schedules created using the @schedule
            decorator) - only the most recent execution time will be considered for those schedules.

            Note that no matter what this value is, the scheduler will never launch a run from a time
            before the schedule was turned on (even if the start_date on the schedule is earlier) - if
            you want to launch runs for earlier partitions, launch a backfill.
            """,
            ),
            "max_tick_retries":
            Field(
                IntSource,
                default_value=0,
                is_required=False,
                description=
                "For each schedule tick that raises an error, how many times to retry that tick",
            ),
        }
Example #12
0
def define_environment_cls(creation_data):
    check.inst_param(creation_data, 'creation_data',
                     EnvironmentClassCreationData)

    return Shape(fields=remove_none_entries({
        'solids':
        Field(
            define_solid_dictionary_cls(
                creation_data.solids,
                creation_data.dependency_structure,
            )),
        'storage':
        Field(
            define_storage_config_cls(creation_data.mode_definition),
            is_optional=True,
        ),
        'execution':
        Field(
            define_executor_config_cls(creation_data.mode_definition),
            is_optional=True,
        ),
        'loggers':
        Field(define_logger_dictionary_cls(creation_data)),
        'resources':
        Field(
            define_resource_dictionary_cls(
                creation_data.mode_definition.resource_defs)),
    }), )
Example #13
0
def get_inputs_field(solid, handle, dependency_structure):
    check.inst_param(solid, 'solid', Solid)
    check.inst_param(handle, 'handle', SolidHandle)
    check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure)

    if not solid.definition.has_configurable_inputs:
        return None

    inputs_field_fields = {}
    for name, inp in solid.definition.input_dict.items():
        if inp.runtime_type.input_hydration_config:
            inp_handle = SolidInputHandle(solid, inp)
            # If this input is not satisfied by a dependency you must
            # provide it via config
            if not dependency_structure.has_deps(inp_handle) and not solid.container_maps_input(
                name
            ):
                inputs_field_fields[name] = Field(
                    inp.runtime_type.input_hydration_config.schema_type
                )

    if not inputs_field_fields:
        return None

    return Field(Shape(inputs_field_fields))
Example #14
0
def config_field_for_configurable_class():
    return Field({
        'module': str,
        'class': str,
        'config': Field(Permissive())
    },
                 is_required=False)
Example #15
0
def define_environment_cls(creation_data):
    check.inst_param(creation_data, "creation_data",
                     EnvironmentClassCreationData)

    intermediate_storage_field = define_storage_field(
        selector_for_named_defs(
            creation_data.mode_definition.intermediate_storage_defs),
        storage_names=[
            dfn.name
            for dfn in creation_data.mode_definition.intermediate_storage_defs
        ],
        defaults=set(
            [storage.name for storage in default_intermediate_storage_defs]),
    )
    if not (intermediate_storage_field.is_required
            or intermediate_storage_field.default_provided):
        storage_field = define_storage_field(
            selector_for_named_defs(
                creation_data.mode_definition.system_storage_defs),
            storage_names=[
                dfn.name
                for dfn in creation_data.mode_definition.system_storage_defs
            ],
            defaults=set(
                [storage.name for storage in default_system_storage_defs]),
        )
    else:
        storage_field = None

    return Shape(fields=remove_none_entries({
        "solids":
        Field(
            define_solid_dictionary_cls(
                solids=creation_data.solids,
                ignored_solids=creation_data.ignored_solids,
                dependency_structure=creation_data.dependency_structure,
            )),
        "storage":
        storage_field,
        "intermediate_storage":
        intermediate_storage_field,
        "execution":
        Field(
            selector_for_named_defs(
                creation_data.mode_definition.executor_defs),
            is_required=False,
        ),
        "loggers":
        Field(define_logger_dictionary_cls(creation_data)),
        "resources":
        Field(
            define_resource_dictionary_cls(
                creation_data.mode_definition.resource_defs)),
    }), )
Example #16
0
    def __call__(self, fn: Callable) -> AssetsDefinition:
        asset_name = self.name or fn.__name__

        ins_by_input_names: Mapping[str, In] = build_asset_ins(
            fn, self.namespace, self.ins or {}, self.non_argument_deps)

        partition_fn: Optional[Callable] = None
        if self.partitions_def:

            def partition_fn(context):  # pylint: disable=function-redefined
                return [context.partition_key]

        out_asset_key = AssetKey(
            list(filter(None, [*(self.namespace or []), asset_name])))
        out = Out(
            asset_key=out_asset_key,
            metadata=self.metadata or {},
            io_manager_key=self.io_manager_key,
            dagster_type=self.dagster_type,
            asset_partitions_def=self.partitions_def,
            asset_partitions=partition_fn,
        )
        op = _Op(
            name=asset_name,
            description=self.description,
            ins={
                input_name: in_def
                for input_name, in_def in ins_by_input_names.items()
            },  # convert Mapping object to dict
            out=out,
            required_resource_keys=self.required_resource_keys,
            tags={"kind": self.compute_kind} if self.compute_kind else None,
            config_schema={
                "assets": {
                    "input_partitions": Field(dict, is_required=False),
                    "output_partitions": Field(dict, is_required=False),
                }
            },
        )(fn)

        return AssetsDefinition(
            input_names_by_asset_key={
                in_def.asset_key: input_name
                for input_name, in_def in ins_by_input_names.items()
            },
            output_names_by_asset_key={out_asset_key: "result"},
            op=op,
            partitions_def=self.partitions_def,
            partition_mappings={
                ins_by_input_names[input_name].asset_key: partition_mapping
                for input_name, partition_mapping in
                self.partition_mappings.items()
            } if self.partition_mappings else None,
        )
Example #17
0
def solid_config_field(fields, ignored):
    if ignored:
        return Field(
            Shape(remove_none_entries(fields)),
            is_required=False,
            description=
            "This solid is not present in the current solid selection, "
            "the config values are allowed but ignored.",
        )
    else:
        return Field(Shape(remove_none_entries(fields)))
Example #18
0
def dagster_instance_config_schema():
    return {
        "local_artifact_storage": config_field_for_configurable_class(),
        "compute_logs": config_field_for_configurable_class(),
        "run_storage": config_field_for_configurable_class(),
        "event_log_storage": config_field_for_configurable_class(),
        "schedule_storage": config_field_for_configurable_class(),
        "scheduler": config_field_for_configurable_class(),
        "run_launcher": config_field_for_configurable_class(),
        "telemetry": Field({"enabled": Field(Bool, is_required=False)}),
        "opt_in": Field({"local_servers": Field(Bool, is_required=False)}),
    }
Example #19
0
def define_execution_field(executor_defs: List[ExecutorDefinition]) -> Field:
    default_in_process = False
    for executor_def in executor_defs:
        if executor_def == in_process_executor:  # pylint: disable=comparison-with-callable
            default_in_process = True

    selector = selector_for_named_defs(executor_defs)

    if default_in_process:
        return Field(selector, default_value={in_process_executor.name: {}})

    return Field(selector)
Example #20
0
def dagster_instance_config_schema():
    return {
        "local_artifact_storage": config_field_for_configurable_class(),
        "compute_logs": config_field_for_configurable_class(),
        "run_storage": config_field_for_configurable_class(),
        "event_log_storage": config_field_for_configurable_class(),
        "schedule_storage": config_field_for_configurable_class(),
        "scheduler": config_field_for_configurable_class(),
        "run_coordinator": config_field_for_configurable_class(),
        "run_launcher": config_field_for_configurable_class(),
        "telemetry": Field({"enabled": Field(Bool, is_required=False)}),
        "sensor_settings": Field({"interval_seconds": Field(int, is_required=False)}),
        "custom_instance_class": Field({"module": str, "class": str}, is_required=False),
    }
Example #21
0
def define_storage_field(storage_selector, storage_names, defaults):
    """Define storage field using default options, if additional storage options have been provided."""
    # If no custom storage options have been provided,
    # then users do not need to provide any configuration.
    if set(storage_names) == defaults:
        return Field(storage_selector, is_required=False)
    else:
        default_storage = FIELD_NO_DEFAULT_PROVIDED
        if len(storage_names) > 0:
            def_key = list(storage_names)[0]
            possible_default = storage_selector.fields[def_key]
            if all_optional_type(possible_default.config_type):
                default_storage = {def_key: {}}
        return Field(storage_selector, default_value=default_storage)
Example #22
0
def dagster_instance_config_schema():
    return {
        "local_artifact_storage": config_field_for_configurable_class(),
        "compute_logs": config_field_for_configurable_class(),
        "run_storage": config_field_for_configurable_class(),
        "event_log_storage": config_field_for_configurable_class(),
        "schedule_storage": config_field_for_configurable_class(),
        "scheduler": config_field_for_configurable_class(),
        "run_coordinator": config_field_for_configurable_class(),
        "run_launcher": config_field_for_configurable_class(),
        "telemetry": Field({"enabled": Field(Bool, is_required=False)}),
        "custom_instance_class": config_field_for_configurable_class(),
        "backfill": Field({"daemon_enabled": Field(Bool, is_required=False)}),
    }
Example #23
0
def _config_mapping_with_default_value(
    inner_schema: ConfigType,
    default_config: Dict[str, Any],
    job_name: str,
    graph_name: str,
) -> ConfigMapping:
    if not isinstance(inner_schema, Shape):
        check.failed(
            "Only Shape (dictionary) config_schema allowed on Job ConfigMapping"
        )

    def config_fn(x):
        return x

    updated_fields = {}
    field_aliases = inner_schema.field_aliases
    for name, field in inner_schema.fields.items():
        if name in default_config:
            updated_fields[name] = Field(
                config=field.config_type,
                default_value=default_config[name],
                description=field.description,
            )
        elif name in field_aliases and field_aliases[name] in default_config:
            updated_fields[name] = Field(
                config=field.config_type,
                default_value=default_config[field_aliases[name]],
                description=field.description,
            )
        else:
            updated_fields[name] = field

    config_schema = Shape(
        fields=updated_fields,
        description="run config schema with default values from default_config",
        field_aliases=inner_schema.field_aliases,
    )

    config_evr = validate_config(config_schema, default_config)
    if not config_evr.success:
        raise DagsterInvalidConfigError(
            f"Error in config when building job '{job_name}' from graph '{graph_name}' ",
            config_evr.errors,
            default_config,
        )

    return ConfigMapping(config_fn=config_fn,
                         config_schema=config_schema,
                         receive_processed_config_values=False)
Example #24
0
def define_environment_cls(creation_data):
    check.inst_param(creation_data, "creation_data",
                     EnvironmentClassCreationData)

    intermediate_storage_field = define_storage_field(
        selector_for_named_defs(
            creation_data.mode_definition.intermediate_storage_defs),
        storage_names=[
            dfn.name
            for dfn in creation_data.mode_definition.intermediate_storage_defs
        ],
        defaults=set(
            [storage.name for storage in default_intermediate_storage_defs]),
    )
    # TODO: remove "storage" entry in run_config as part of system storage removal
    # currently we treat "storage" as an alias to "intermediate_storage" and storage field is optional
    # tracking https://github.com/dagster-io/dagster/issues/3280
    storage_field = Field(
        selector_for_named_defs(
            creation_data.mode_definition.intermediate_storage_defs),
        is_required=False,
    )

    return Shape(fields=remove_none_entries({
        "solids":
        Field(
            define_solid_dictionary_cls(
                solids=creation_data.solids,
                ignored_solids=creation_data.ignored_solids,
                dependency_structure=creation_data.dependency_structure,
                resource_defs=creation_data.mode_definition.resource_defs,
            )),
        "storage":
        storage_field,
        "intermediate_storage":
        intermediate_storage_field,
        "execution":
        Field(
            selector_for_named_defs(
                creation_data.mode_definition.executor_defs),
            is_required=False,
        ),
        "loggers":
        Field(define_logger_dictionary_cls(creation_data)),
        "resources":
        Field(
            define_resource_dictionary_cls(
                creation_data.mode_definition.resource_defs)),
    }), )
Example #25
0
def _maybe_include_executable_path(config_dict, include_executable_path):
    return merge_dicts(
        config_dict,
        ({
            "executable_path": Field(StringSource, is_required=False)
        } if include_executable_path else {}),
    )
Example #26
0
def solid_config_field(fields: Dict[str, Optional[Field]],
                       ignored: bool) -> Optional[Field]:
    trimmed_fields = remove_none_entries(fields)
    if trimmed_fields:
        if ignored:
            return Field(
                Shape(trimmed_fields),
                is_required=False,
                description=
                "This solid is not present in the current solid selection, "
                "the config values are allowed but ignored.",
            )
        else:
            return Field(Shape(trimmed_fields))
    else:
        return None
Example #27
0
def get_type_loader_input_field(solid: Node, input_name: str,
                                input_def: InputDefinition) -> Field:
    return Field(
        input_def.dagster_type.loader.schema_type,
        is_required=(not solid.definition.input_has_default(input_name)
                     and not input_def.root_manager_key),
    )
Example #28
0
def _get_host_mode_executor(recon_pipeline, run_config, executor_defs,
                            instance):
    execution_config = run_config.get("execution", {})
    execution_config_type = Field(selector_for_named_defs(executor_defs),
                                  default_value={
                                      executor_defs[0].name: {}
                                  }).config_type

    config_evr = process_config(execution_config_type, execution_config)
    if not config_evr.success:
        raise DagsterInvalidConfigError(
            "Error processing execution config {}".format(execution_config),
            config_evr.errors,
            execution_config,
        )

    execution_config_value = config_evr.value

    executor_name, executor_config = ensure_single_item(execution_config_value)

    executor_defs_by_name = {
        executor_def.name: executor_def
        for executor_def in executor_defs
    }
    executor_def = executor_defs_by_name[executor_name]

    init_context = InitExecutorContext(
        job=recon_pipeline,
        executor_def=executor_def,
        executor_config=executor_config["config"],
        instance=instance,
    )
    check_cross_process_constraints(init_context)
    return executor_def.executor_creation_fn(init_context)
Example #29
0
def def_config_field(configurable_def: ConfigurableDefinition,
                     is_required: bool = None) -> Field:
    return Field(
        Shape({"config": configurable_def.config_field} if configurable_def.
              has_config_field else {}),
        is_required=is_required,
    )
Example #30
0
def get_inputs_field(solid, handle, dependency_structure, resource_defs):
    check.inst_param(solid, "solid", Solid)
    check.inst_param(handle, "handle", SolidHandle)
    check.inst_param(dependency_structure, "dependency_structure",
                     DependencyStructure)

    inputs_field_fields = {}
    for name, inp in solid.definition.input_dict.items():
        inp_handle = SolidInputHandle(solid, inp)
        has_upstream = input_has_upstream(dependency_structure, inp_handle,
                                          solid, name)
        if inp.root_manager_key and not has_upstream:
            input_field = get_input_manager_input_field(
                solid, inp, resource_defs)
        elif inp.dagster_type.loader and not has_upstream:
            input_field = get_type_loader_input_field(solid, name, inp)
        else:
            input_field = None

        if input_field:
            inputs_field_fields[name] = input_field

    if not inputs_field_fields:
        return None

    return Field(Shape(inputs_field_fields))