def test_intermediate_storage_def_to_io_manager_def(): called = {} @intermediate_storage() def no_config_intermediate_storage(init_context): called["ran"] = True object_store = InMemoryObjectStore() return build_intermediate_storage_from_object_store( object_store=object_store, init_context=init_context) @solid def return_one(_): return 1 @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "io_manager": io_manager_from_intermediate_storage( no_config_intermediate_storage) }) ]) def foo(): return_one() assert execute_pipeline(foo).success
def _get_config_schema( self, resource_defs: Optional[Dict[str, ResourceDefinition]], executor_def: "ExecutorDefinition", ) -> ConfigType: from .pipeline import PipelineDefinition return (PipelineDefinition( name=self.name, graph_def=self, mode_defs=[ ModeDefinition(resource_defs=resource_defs, executor_defs=[executor_def]) ], ).get_run_config_schema("default").run_config_schema_type)
def _get_config_schema( self, resource_defs: Optional[Dict[str, ResourceDefinition]], executor_def: "ExecutorDefinition", logger_defs: Optional[Dict[str, LoggerDefinition]], ) -> ConfigType: from .job_definition import JobDefinition return (JobDefinition( name=self.name, graph_def=self, mode_def=ModeDefinition( resource_defs=resource_defs, executor_defs=[executor_def], logger_defs=logger_defs, ), ).get_run_config_schema("default").run_config_schema_type)
from dagster.core.storage.fs_io_manager import fs_io_manager from dagster.core.test_utils import create_run_for_test, instance_for_test from dagster.grpc.types import ExecuteStepArgs from dagster_k8s.executor import K8sStepHandler, k8s_job_executor from dagster_k8s.job import DagsterK8sJobConfig, UserDefinedDagsterK8sConfig @solid def foo(): return 1 @pipeline( mode_defs=[ ModeDefinition( executor_defs=[k8s_job_executor], resource_defs={"io_manager": fs_io_manager} ) ] ) def bar(): foo() def test_requires_k8s_launcher_fail(): with instance_for_test() as instance: with pytest.raises( DagsterUnmetExecutorRequirementsError, match="This engine is only compatible with a K8sRunLauncher", ): execute_pipeline(reconstructable(bar), instance=instance)
) @solid def bar_solid(_): return "bar" @solid(tags={"foo": "bar"}) def baz_solid(_, bar): return bar * 2 @pipeline(mode_defs=[ ModeDefinition( executor_defs=[test_step_delegating_executor], resource_defs={"io_manager": fs_io_manager}, ) ]) def foo_pipline(): baz_solid(bar_solid()) bar_solid() def test_execute(): TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(foo_pipline), instance=instance, run_config={ "execution": {
materialize, get_materialization_lock, get_materialize_sensor_lock, lock_materialization_process, release_materialization_process, ) @discord_message_on_failure @discord_message_on_success @pipeline( mode_defs=[ ModeDefinition( "dev", resource_defs={ "discord_webhook": discord_webhook, "timezone_config": timezone_config, }, ), ], tags={ "pipeline": "update_managed_materialized_views", "dagster-k8s/config": { "container_config": { "resources": { "requests": { "cpu": "50m", "memory": "100Mi" }, "limits": { "cpu": "500m",
def execute_in_process( self, run_config: Any = None, instance: Optional["DagsterInstance"] = None, resources: Optional[Dict[str, Any]] = None, raise_on_error: bool = True, op_selection: Optional[List[str]] = None, ) -> "ExecuteInProcessResult": """ Execute this graph in-process, collecting results in-memory. Args: run_config (Optional[Dict[str, Any]]): Run config to provide to execution. The configuration for the underlying graph should exist under the "ops" key. instance (Optional[DagsterInstance]): The instance to execute against, an ephemeral one will be used if none provided. resources (Optional[Dict[str, Any]]): The resources needed if any are required. Can provide resource instances directly, or resource definitions. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``. op_selection (Optional[List[str]]): A list of op selection queries (including single op names) to execute. For example: * ``['some_op']``: selects ``some_op`` itself. * ``['*some_op']``: select ``some_op`` and all its ancestors (upstream dependencies). * ``['*some_op+++']``: select ``some_op``, all its ancestors, and its descendants (downstream dependencies) within 3 levels down. * ``['*some_op', 'other_op_a', 'other_op_b+']``: select ``some_op`` and all its ancestors, ``other_op_a`` itself, and ``other_op_b`` and its direct child ops. Returns: :py:class:`~dagster.ExecuteInProcessResult` """ from dagster.core.execution.build_resources import wrap_resources_for_execution from dagster.core.execution.execute_in_process import core_execute_in_process from dagster.core.instance import DagsterInstance from .job_definition import JobDefinition from .executor_definition import execute_in_process_executor instance = check.opt_inst_param(instance, "instance", DagsterInstance) resources = check.opt_dict_param(resources, "resources", key_type=str) resource_defs = wrap_resources_for_execution(resources) in_proc_mode = ModeDefinition( executor_defs=[execute_in_process_executor], resource_defs=resource_defs) ephemeral_job = JobDefinition( name=self._name, graph_def=self, mode_def=in_proc_mode).get_job_def_for_op_selection(op_selection) run_config = run_config if run_config is not None else {} op_selection = check.opt_list_param(op_selection, "op_selection", str) return core_execute_in_process( node=self, ephemeral_pipeline=ephemeral_job, run_config=run_config, instance=instance, output_capturing_enabled=True, raise_on_error=raise_on_error, )
def to_job( self, name: Optional[str] = None, description: Optional[str] = None, resource_defs: Optional[Dict[str, ResourceDefinition]] = None, config: Union[ConfigMapping, Dict[str, Any], "PartitionedConfig"] = None, tags: Optional[Dict[str, Any]] = None, logger_defs: Optional[Dict[str, LoggerDefinition]] = None, executor_def: Optional["ExecutorDefinition"] = None, hooks: Optional[AbstractSet[HookDefinition]] = None, op_retry_policy: Optional[RetryPolicy] = None, version_strategy: Optional[VersionStrategy] = None, op_selection: Optional[List[str]] = None, partitions_def: Optional["PartitionsDefinition"] = None, ) -> "JobDefinition": """ Make this graph in to an executable Job by providing remaining components required for execution. Args: name (Optional[str]): The name for the Job. Defaults to the name of the this graph. resource_defs (Optional[Dict[str, ResourceDefinition]]): Resources that are required by this graph for execution. If not defined, `io_manager` will default to filesystem. config: Describes how the job is parameterized at runtime. If no value is provided, then the schema for the job's run config is a standard format based on its solids and resources. If a dictionary is provided, then it must conform to the standard config schema, and it will be used as the job's run config for the job whenever the job is executed. The values provided will be viewable and editable in the Dagit playground, so be careful with secrets. If a :py:class:`ConfigMapping` object is provided, then the schema for the job's run config is determined by the config mapping, and the ConfigMapping, which should return configuration in the standard format to configure the job. If a :py:class:`PartitionedConfig` object is provided, then it defines a discrete set of config values that can parameterize the job, as well as a function for mapping those values to the base config. The values provided will be viewable and editable in the Dagit playground, so be careful with secrets. tags (Optional[Dict[str, Any]]): Arbitrary metadata for any execution of the Job. Values that are not strings will be json encoded and must meet the criteria that `json.loads(json.dumps(value)) == value`. These tag values may be overwritten by tag values provided at invocation time. logger_defs (Optional[Dict[str, LoggerDefinition]]): A dictionary of string logger identifiers to their implementations. executor_def (Optional[ExecutorDefinition]): How this Job will be executed. Defaults to :py:class:`multi_or_in_process_executor`, which can be switched between multi-process and in-process modes of execution. The default mode of execution is multi-process. op_retry_policy (Optional[RetryPolicy]): The default retry policy for all ops in this job. Only used if retry policy is not defined on the op definition or op invocation. version_strategy (Optional[VersionStrategy]): Defines how each solid (and optionally, resource) in the job can be versioned. If provided, memoizaton will be enabled for this job. partitions_def (Optional[PartitionsDefinition]): Defines a discrete set of partition keys that can parameterize the job. If this argument is supplied, the config argument can't also be supplied. Returns: JobDefinition """ from .job_definition import JobDefinition from .partition import PartitionedConfig, PartitionsDefinition from .executor_definition import ExecutorDefinition, multi_or_in_process_executor job_name = check_valid_name(name or self.name) tags = check.opt_dict_param(tags, "tags", key_type=str) executor_def = check.opt_inst_param( executor_def, "executor_def", ExecutorDefinition, default=multi_or_in_process_executor) if resource_defs and "io_manager" in resource_defs: resource_defs_with_defaults = resource_defs else: resource_defs_with_defaults = merge_dicts( {"io_manager": default_job_io_manager}, resource_defs or {}) hooks = check.opt_set_param(hooks, "hooks", of_type=HookDefinition) op_retry_policy = check.opt_inst_param(op_retry_policy, "op_retry_policy", RetryPolicy) op_selection = check.opt_list_param(op_selection, "op_selection", of_type=str) presets = [] config_mapping = None partitioned_config = None if partitions_def: check.inst_param(partitions_def, "partitions_def", PartitionsDefinition) check.invariant( config is None, "Can't supply both the 'config' and 'partitions_def' arguments" ) partitioned_config = PartitionedConfig(partitions_def, lambda _: {}) if isinstance(config, ConfigMapping): config_mapping = config elif isinstance(config, PartitionedConfig): partitioned_config = config elif isinstance(config, dict): presets = [PresetDefinition(name="default", run_config=config)] # Using config mapping here is a trick to make it so that the preset will be used even # when no config is supplied for the job. config_mapping = _config_mapping_with_default_value( self._get_config_schema(resource_defs_with_defaults, executor_def, logger_defs), config, job_name, self.name, ) elif config is not None: check.failed( f"config param must be a ConfigMapping, a PartitionedConfig, or a dictionary, but " f"is an object of type {type(config)}") return JobDefinition( name=job_name, description=description or self.description, graph_def=self, mode_def=ModeDefinition( resource_defs=resource_defs_with_defaults, logger_defs=logger_defs, executor_defs=[executor_def], _config_mapping=config_mapping, _partitioned_config=partitioned_config, ), preset_defs=presets, tags=tags, hook_defs=hooks, version_strategy=version_strategy, op_retry_policy=op_retry_policy, ).get_job_def_for_op_selection(op_selection)
def to_job( self, name: Optional[str] = None, description: Optional[str] = None, resource_defs: Optional[Dict[str, ResourceDefinition]] = None, config: Union[ConfigMapping, Dict[str, Any], "PartitionedConfig"] = None, tags: Optional[Dict[str, str]] = None, logger_defs: Optional[Dict[str, LoggerDefinition]] = None, executor_def: Optional["ExecutorDefinition"] = None, hooks: Optional[AbstractSet[HookDefinition]] = None, ) -> "PipelineDefinition": """ Make this graph in to an executable Job by providing remaining components required for execution. Args: name (Optional[str]): The name for the Job. Defaults to the name of the this graph. resource_defs (Optional[Dict[str, ResourceDefinition]]): Resources that are required by this graph for execution. If not defined, `io_manager` will default to filesystem. config: Describes how the job is parameterized at runtime. If no value is provided, then the schema for the job's run config is a standard format based on its solids and resources. If a dictionary is provided, then it must conform to the standard config schema, and it will be used as the job's run config for the job whenever the job is executed. The values provided will be viewable and editable in the Dagit playground, so be careful with secrets. If a :py:class:`ConfigMapping` object is provided, then the schema for the job's run config is determined by the config mapping, and the ConfigMapping, which should return configuration in the standard format to configure the job. If a :py:class:`PartitionedConfig` object is provided, then it defines a discrete set of config values that can parameterize the pipeline, as well as a function for mapping those values to the base config. The values provided will be viewable and editable in the Dagit playground, so be careful with secrets. tags (Optional[Dict[str, Any]]): Arbitrary metadata for any execution of the Job. Values that are not strings will be json encoded and must meet the criteria that `json.loads(json.dumps(value)) == value`. These tag values may be overwritten by tag values provided at invocation time. logger_defs (Optional[Dict[str, LoggerDefinition]]): A dictionary of string logger identifiers to their implementations. executor_def (Optional[ExecutorDefinition]): How this Job will be executed. Defaults to :py:class:`multiprocess_executor` . Returns: PipelineDefinition: The "Job" currently implemented as a single-mode pipeline """ from .pipeline import PipelineDefinition from .partition import PartitionedConfig from .executor import ExecutorDefinition, multiprocess_executor tags = check.opt_dict_param(tags, "tags", key_type=str, value_type=str) executor_def = check.opt_inst_param(executor_def, "executor_def", ExecutorDefinition, default=multiprocess_executor) if resource_defs and "io_manager" in resource_defs: resource_defs_with_defaults = resource_defs else: resource_defs_with_defaults = merge_dicts( {"io_manager": default_job_io_manager}, resource_defs or {}) hooks = check.opt_set_param(hooks, "hooks", of_type=HookDefinition) presets = [] config_mapping = None partitioned_config = None if isinstance(config, ConfigMapping): config_mapping = config elif isinstance(config, PartitionedConfig): partitioned_config = config elif isinstance(config, dict): presets = [PresetDefinition(name="default", run_config=config)] # Using config mapping here is a trick to make it so that the preset will be used even # when no config is supplied for the job. config_mapping = _config_mapping_with_default_value( self._get_config_schema(resource_defs_with_defaults, executor_def), config) elif config is not None: check.failed( f"config param must be a ConfigMapping, a PartitionedConfig, or a dictionary, but " f"is an object of type {type(config)}") job_name = name or self.name return PipelineDefinition( name=job_name, description=description, graph_def=self, mode_defs=[ ModeDefinition( resource_defs=resource_defs_with_defaults, logger_defs=logger_defs, executor_defs=[executor_def], _config_mapping=config_mapping, _partitioned_config=partitioned_config, ) ], preset_defs=presets, tags=tags, hook_defs=hooks, )