def __init__(self, execution_context_data, log_manager): self._execution_context_data = check.inst_param( execution_context_data, "execution_context_data", SystemExecutionContextData) self._log_manager = check.inst_param(log_manager, "log_manager", DagsterLogManager)
def __init__(self, error_info): self.error_info = check.inst_param(error_info, 'error_info', SerializableErrorInfo)
def from_dagster_error(config_schema_snapshot, error): check.inst_param(config_schema_snapshot, 'config_schema_snapshot', ConfigSchemaSnapshot) check.inst_param(error, 'error', EvaluationError) if isinstance(error.error_data, RuntimeMismatchErrorData): return DauphinRuntimeMismatchConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, value_rep=error.error_data.value_rep, ) elif isinstance(error.error_data, MissingFieldErrorData): return DauphinMissingFieldConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, field=DauphinConfigTypeField( config_schema_snapshot=config_schema_snapshot, field_meta=snap_from_field( error.error_data.field_name, error.error_data.field_def ), ), ) elif isinstance(error.error_data, MissingFieldsErrorData): return DauphinMissingFieldsConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, fields=[ DauphinConfigTypeField( config_schema_snapshot=config_schema_snapshot, field_meta=snap_from_field(field_name, field_def), ) for field_name, field_def in zip( error.error_data.field_names, error.error_data.field_defs ) ], ) elif isinstance(error.error_data, FieldNotDefinedErrorData): return DauphinFieldNotDefinedConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, field_name=error.error_data.field_name, ) elif isinstance(error.error_data, FieldsNotDefinedErrorData): return DauphinFieldsNotDefinedConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, field_names=error.error_data.field_names, ) elif isinstance(error.error_data, SelectorTypeErrorData): return DauphinSelectorTypeConfigError( message=error.message, path=[], # TODO: remove stack=DauphinEvaluationStack(config_schema_snapshot, error.stack), reason=error.reason, incoming_fields=error.error_data.incoming_fields, ) else: check.failed( 'Error type not supported {error_data}'.format(error_data=repr(error.error_data)) )
def execute_solid( solid_def, mode_def=None, input_values=None, environment_dict=None, run_config=None, raise_on_error=True, ): '''Execute a single solid in an ephemeral pipeline. Intended to support unit tests. Input values may be passed directly, and no pipeline need be specified -- an ephemeral pipeline will be constructed. Args: solid_def (SolidDefinition): The solid to execute. mode_def (Optional[ModeDefinition]): The mode within which to execute the solid. Use this if, e.g., custom resources, loggers, or executors are desired. input_values (Optional[Dict[str, Any]]): A dict of input names to input values, used to pass inputs to the solid directly. You may also use the ``environment_dict`` to configure any inputs that are configurable. environment_dict (Optional[dict]): The enviroment configuration that parameterizes this execution, as a dict. run_config (Optional[RunConfig]): Optionally specifies additional config options for pipeline execution. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of executing the solid. ''' check.inst_param(solid_def, 'solid_def', ISolidDefinition) check.opt_inst_param(mode_def, 'mode_def', ModeDefinition) input_values = check.opt_dict_param(input_values, 'input_values', key_type=str) solid_defs = [solid_def] def create_value_solid(input_name, input_value): @lambda_solid(name=input_name) def input_solid(): return input_value return input_solid dependencies = defaultdict(dict) for input_name, input_value in input_values.items(): dependencies[solid_def.name][input_name] = DependencyDefinition( input_name) solid_defs.append(create_value_solid(input_name, input_value)) result = execute_pipeline( PipelineDefinition( name='ephemeral_{}_solid_pipeline'.format(solid_def.name), solid_defs=solid_defs, dependencies=dependencies, mode_defs=[mode_def] if mode_def else None, ), environment_dict=environment_dict, run_config=run_config, raise_on_error=raise_on_error, ) return result.result_for_handle(solid_def.name)
def build_dep_structure_snapshot_from_icontains_solids(icontains_solids): check.inst_param(icontains_solids, "icontains_solids", IContainSolids) return DependencyStructureSnapshot(solid_invocation_snaps=[ build_solid_invocation_snap(icontains_solids, solid) for solid in icontains_solids.solids ])
def __init__(self, object_store): self._object_store = check.inst_param(object_store, 'object_store', ObjectStore) self.storage_mode = self._object_store.storage_mode
def has_intermediate(self, context, step_output_handle): check.opt_inst_param(context, 'context', SystemPipelineExecutionContext) check.inst_param(step_output_handle, 'step_output_handle', StepOutputHandle) return step_output_handle in self.values
def __new__(cls, config_type_snap, value_rep): check.inst_param(config_type_snap, "config_type", ConfigTypeSnap) return super(RuntimeMismatchErrorData, cls).__new__( cls, config_type_snap, check.str_param(value_rep, "value_rep"), )
def __init__(self, job_state, timestamp): self._job_state = check.inst_param(job_state, "job_state", JobState) self._timestamp = timestamp super().__init__(timestamp=check.float_param(timestamp, "timestamp"), )
def initialize(self, instance): check.inst_param(instance, "instance", DagsterInstance) # Store a weakref to avoid a circular reference / enable GC self._instance_ref = weakref.ref(instance)
def __new__(cls, field_name, field_snap): return super(MissingFieldErrorData, cls).__new__( cls, check.str_param(field_name, "field_name"), check.inst_param(field_snap, "field_snap", ConfigFieldSnap), )
def __new__(cls, solid_handle: Optional[SolidHandle]): return super(UnresolvedStepHandle, cls).__new__( cls, solid_handle=check.inst_param(solid_handle, "solid_handle", SolidHandle), )
def __new__( cls, pipeline_run, scoped_resources_builder, environment_config, pipeline, mode_def, system_storage_def, intermediate_storage_def, instance, intermediate_storage, file_manager, raise_on_error, retries, execution_plan, ): from dagster.core.definitions.system_storage import SystemStorageDefinition from dagster.core.definitions.intermediate_storage import IntermediateStorageDefinition from dagster.core.storage.intermediate_storage import IntermediateStorage from dagster.core.instance import DagsterInstance from dagster.core.execution.plan.plan import ExecutionPlan return super(SystemExecutionContextData, cls).__new__( cls, pipeline_run=check.inst_param(pipeline_run, "pipeline_run", PipelineRun), scoped_resources_builder=check.inst_param( scoped_resources_builder, "scoped_resources_builder", ScopedResourcesBuilder), environment_config=check.inst_param(environment_config, "environment_config", EnvironmentConfig), pipeline=check.inst_param(pipeline, "pipeline", IPipeline), mode_def=check.inst_param(mode_def, "mode_def", ModeDefinition), system_storage_def=check.inst_param(system_storage_def, "system_storage_def", SystemStorageDefinition), intermediate_storage_def=check.opt_inst_param( intermediate_storage_def, "intermediate_storage_def", IntermediateStorageDefinition), instance=check.inst_param(instance, "instance", DagsterInstance), intermediate_storage=check.inst_param(intermediate_storage, "intermediate_storage", IntermediateStorage), file_manager=check.inst_param(file_manager, "file_manager", FileManager), raise_on_error=check.bool_param(raise_on_error, "raise_on_error"), retries=check.inst_param(retries, "retries", Retries), execution_plan=check.inst_param(execution_plan, "execution_plan", ExecutionPlan), )
def __init__(self, execution_context_data, log_manager, executor): super(SystemPipelineExecutionContext, self).__init__(execution_context_data, log_manager) self._executor = check.inst_param(executor, "executor", Executor)
def hourly_schedule( pipeline_name, start_date, name=None, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.inst_param(execution_time, 'execution_time', datetime.time) if execution_time.hour != 0: warnings.warn( "Hourly schedule {schedule_name} created with:\n" "\tschedule_time=datetime.time(hour={hour}, minute={minute}, ...)." "Since this is a hourly schedule, the hour parameter will be ignored and the schedule " "will run on the {minute} mark for the previous hour interval. Replace " "datetime.time(hour={hour}, minute={minute}, ...) with datetime.time(minute={minute}, ...)" "to fix this warning.") cron_schedule = '{minute} * * * *'.format(minute=execution_time.minute) partition_fn = date_partition_range(start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_hourly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def __init__(self, run_request): super().__init__(runKey=run_request.run_key) self._run_request = check.inst_param(run_request, "run_request", RunRequest)
def monthly_schedule( pipeline_name, start_date, name=None, execution_day_of_month=1, execution_time=datetime.time(0, 0), tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.opt_str_param(name, 'name') check.inst_param(start_date, 'start_date', datetime.datetime) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.int_param(execution_day_of_month, 'execution_day') check.inst_param(execution_time, 'execution_time', datetime.time) if execution_day_of_month <= 0 or execution_day_of_month > 31: raise DagsterInvalidDefinitionError( "`execution_day_of_month={}` is not valid for monthly schedule. Execution day must be between 1 and 31" .format(execution_day_of_month)) cron_schedule = '{minute} {hour} {day} * *'.format( minute=execution_time.minute, hour=execution_time.hour, day=execution_day_of_month) partition_fn = date_partition_range(start_date, delta=relativedelta(months=1), fmt="%Y-%m") def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_monthly'.format(pipeline_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def __init__(self, job_specific_data): check.inst_param(job_specific_data, "job_specific_data", SensorJobData) super().__init__( lastTickTimestamp=job_specific_data.last_tick_timestamp, lastRunKey=job_specific_data.last_run_key, )
def from_step(step, output_name='result'): from .execution_plan.objects import ExecutionStep check.inst_param(step, 'step', ExecutionStep) return StepOutputHandle(step.key, output_name)
def engine_error(error): check.inst_param(error, "error", SerializableErrorInfo) return EngineEventData(metadata_entries=[], error=error)
def __init__(self, instance_ref, last_completion_time, last_run_key): self._instance_ref = check.inst_param(instance_ref, "instance_ref", InstanceRef) self._last_completion_time = check.opt_float_param( last_completion_time, "last_completion_time" ) self._last_run_key = check.opt_str_param(last_run_key, "last_run_key")
def __new__(cls, error): return super(PipelineInitFailureData, cls).__new__(cls, error=check.inst_param( error, "error", SerializableErrorInfo))
def _send_state_event_to_subscribers(self, event: LocationStateChangeEvent) -> None: check.inst_param(event, "event", LocationStateChangeEvent) for subscriber in self._state_subscribers: subscriber.handle_event(event)
def __new__(cls, error): return super(HookErroredData, cls).__new__( cls, error=check.inst_param(error, "error", SerializableErrorInfo), )
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode, preset, tags, run_config, instance): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, default=RunConfig()) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) check.invariant( run_config.mode is None or pipeline_preset.mode == run_config.mode, 'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode ' 'set in the `run_config` (\'{run_config_mode}\')'.format( preset=preset, preset_mode=pipeline_preset.mode, run_config_mode=run_config.mode), ) if pipeline_preset.environment_dict is not None: check.invariant( (not environment_dict) or (pipeline_preset.environment_dict == environment_dict), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `environment_dict` argument.'.format( preset=preset), ) environment_dict = pipeline_preset.environment_dict if pipeline_preset.solid_subset is not None: pipeline = pipeline.build_sub_pipeline( pipeline_preset.solid_subset) check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if run_config.mode is not None or run_config.tags: warnings.warn(( 'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be ' 'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` ' 'instead.').format(fn_name=fn_name)) if run_config.mode is not None: if mode is not None: check.invariant( run_config.mode == mode, 'Mode \'{mode}\' does not agree with the mode set in the `run_config`: ' '\'{run_config_mode}\''.format( mode=mode, run_config_mode=run_config.mode), ) mode = run_config.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if not pipeline_def.is_single_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}), tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, environment_dict, mode=mode, step_keys_to_execute=run_config.step_keys_to_execute, ) return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
def _validate_event_specific_data(event_type, event_specific_data): from dagster.core.execution.plan.objects import StepFailureData, StepSuccessData from dagster.core.execution.plan.inputs import StepInputData if event_type == DagsterEventType.STEP_OUTPUT: check.inst_param(event_specific_data, "event_specific_data", StepOutputData) elif event_type == DagsterEventType.STEP_FAILURE: check.inst_param(event_specific_data, "event_specific_data", StepFailureData) elif event_type == DagsterEventType.STEP_SUCCESS: check.inst_param(event_specific_data, "event_specific_data", StepSuccessData) elif event_type == DagsterEventType.STEP_MATERIALIZATION: check.inst_param(event_specific_data, "event_specific_data", StepMaterializationData) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: check.inst_param(event_specific_data, "event_specific_data", StepExpectationResultData) elif event_type == DagsterEventType.STEP_INPUT: check.inst_param(event_specific_data, "event_specific_data", StepInputData) elif event_type == DagsterEventType.ENGINE_EVENT: check.inst_param(event_specific_data, "event_specific_data", EngineEventData) elif event_type == DagsterEventType.HOOK_ERRORED: check.inst_param(event_specific_data, "event_specific_data", HookErroredData) return event_specific_data
def __new__(cls, pipeline_run, process, message_queue): return super(RunProcessWrapper, cls).__new__( cls, check.inst_param(pipeline_run, 'pipeline_run', PipelineRun), process, message_queue)
def object_store_operation(step_context, object_store_operation_result): from dagster.core.definitions.events import ObjectStoreOperation check.inst_param(object_store_operation_result, "object_store_operation_result", ObjectStoreOperation) object_store_name = ("{object_store_name} ".format( object_store_name=object_store_operation_result.object_store_name) if object_store_operation_result.object_store_name else "") serialization_strategy_modifier = ( " using {serialization_strategy_name}".format( serialization_strategy_name=object_store_operation_result. serialization_strategy_name) if object_store_operation_result.serialization_strategy_name else "") value_name = object_store_operation_result.value_name if (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.SET_OBJECT): message = ( "Stored intermediate object for output {value_name} in " "{object_store_name}object store{serialization_strategy_modifier}." ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.GET_OBJECT): message = ( "Retrieved intermediate object for input {value_name} in " "{object_store_name}object store{serialization_strategy_modifier}." ).format( value_name=value_name, object_store_name=object_store_name, serialization_strategy_modifier=serialization_strategy_modifier, ) elif (ObjectStoreOperationType(object_store_operation_result.op) == ObjectStoreOperationType.CP_OBJECT): message = ( "Copied intermediate object for input {value_name} from {key} to {dest_key}" ).format( value_name=value_name, key=object_store_operation_result.key, dest_key=object_store_operation_result.dest_key, ) else: message = "" return DagsterEvent.from_step( DagsterEventType.OBJECT_STORE_OPERATION, step_context, event_specific_data=ObjectStoreOperationResultData( op=object_store_operation_result.op, value_name=value_name, address=object_store_operation_result.key, metadata_entries=[ EventMetadataEntry.path(object_store_operation_result.key, label="key") ], version=object_store_operation_result.version, mapping_key=object_store_operation_result.mapping_key, ), message=message, )
def __init__(self, config_schema_snapshot, stack): self._config_schema_snapshot = check.inst_param( config_schema_snapshot, 'config_schema_snapshot', ConfigSchemaSnapshot ) self._stack = stack super(DauphinEvaluationStack, self).__init__()
def __init__(self, event): super().__init__() self._event = check.inst_param(event, "event", EventRecord) check.invariant( isinstance(event.dagster_event.step_materialization_data, StepMaterializationData) )