def whitelist_for_serdes( __cls: Optional[Type] = None, *, serializer: Optional[Type["Serializer"]] = None, storage_name: Optional[str] = None, ): """ Decorator to whitelist a NamedTuple or enum to be serializable. If a `storage_name` is provided for a NamedTuple, then serialized instances of the NamedTuple will be stored with under the `storage_name` instead of the class name. This is primarily useful for maintaining backwards compatibility. If a serialized object undergoes a name change, then setting `storage_name` to the old name will (a) allow the object to be deserialized by versions of Dagster prior to the name change; (b) allow Dagster to load objects stored using the old name. @whitelist_for_serdes class """ check.invariant( not storage_name or (serializer is None or issubclass(serializer, DefaultNamedTupleSerializer)), "storage_name can only be used with DefaultNamedTupleSerializer", ) if __cls is not None: # decorator invoked directly on class check.class_param(__cls, "__cls") return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP)(__cls) else: # decorator passed params check.opt_class_param(serializer, "serializer", superclass=Serializer) serializer = cast(Type[Serializer], serializer) return _whitelist_for_serdes( whitelist_map=_WHITELIST_MAP, serializer=serializer, storage_name=storage_name )
def report_engine_event( self, message, pipeline_run, engine_event_data=None, cls=None, step_key=None, ): ''' Report a EngineEvent that occurred outside of a pipeline execution context. ''' from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType from dagster.core.events.log import DagsterEventRecord check.class_param(cls, 'cls') check.str_param(message, 'message') check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) engine_event_data = check.opt_inst_param( engine_event_data, 'engine_event_data', EngineEventData, EngineEventData([]), ) if cls: message = "[{}] {}".format(cls.__name__, message) log_level = logging.INFO if engine_event_data and engine_event_data.error: log_level = logging.ERROR dagster_event = DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=pipeline_run.pipeline_name, message=message, event_specific_data=engine_event_data, ) event_record = DagsterEventRecord( message=message, user_message=message, level=log_level, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), step_key=step_key, dagster_event=dagster_event, ) self.handle_new_event(event_record) return dagster_event
def __init__( self, python_type: t.Union[t.Type, t.Tuple[t.Type, ...]], key: t.Optional[str] = None, name: t.Optional[str] = None, **kwargs, ): if isinstance(python_type, tuple): self.python_type = check.tuple_param( python_type, "python_type", of_shape=tuple(type for item in python_type) ) self.type_str = "Union[{}]".format( ", ".join(python_type.__name__ for python_type in python_type) ) typing_type = t.Union[python_type] # type: ignore else: self.python_type = check.class_param(python_type, "python_type") # type: ignore self.type_str = cast(str, python_type.__name__) typing_type = self.python_type # type: ignore name = check.opt_str_param(name, "name", self.type_str) key = check.opt_str_param(key, "key", name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=isinstance_type_check_fn(python_type, name, self.type_str), typing_type=typing_type, **kwargs, )
def user_code_error_boundary(error_cls, msg_fn, log_manager=None, **kwargs): """ Wraps the execution of user-space code in an error boundary. This places a uniform policy around any user code invoked by the framework. This ensures that all user errors are wrapped in an exception derived from DagsterUserCodeExecutionError, and that the original stack trace of the user error is preserved, so that it can be reported without confusing framework code in the stack trace, if a tool author wishes to do so. Examples: .. code-block:: python with user_code_error_boundary( # Pass a class that inherits from DagsterUserCodeExecutionError DagsterExecutionStepExecutionError, # Pass a function that produces a message "Error occurred during step execution" ): call_user_provided_function() """ check.callable_param(msg_fn, "msg_fn") check.class_param(error_cls, "error_cls", superclass=DagsterUserCodeExecutionError) with raise_execution_interrupts(): if log_manager: log_manager.begin_python_log_capture() try: yield except DagsterError as de: # The system has thrown an error that is part of the user-framework contract raise de except Exception as e: # pylint: disable=W0703 # An exception has been thrown by user code and computation should cease # with the error reported further up the stack raise error_cls(msg_fn(), user_exception=e, original_exc_info=sys.exc_info(), **kwargs) from e finally: if log_manager: log_manager.end_python_log_capture()
def whitelist_for_serdes(__cls: Optional[Type] = None, *, serializer: Optional[Type["Serializer"]] = None): """ Decorator to whitelist a named tuple or enum to be serializable. @whitelist_for_serdes class """ if __cls is not None: # decorator invoked directly on class check.class_param(__cls, "__cls") return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP, serializer=None)(__cls) else: # decorator passed params check.subclass_param(serializer, "serializer", Serializer) serializer = cast(Type[Serializer], serializer) return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP, serializer=serializer)
def __init__( self, generator: Generator[Union["DagsterEvent", GeneratedContext], None, None], object_cls: Type[GeneratedContext], require_object: Optional[bool] = True, ): self.generator = check.generator(generator) self.object_cls: Type[GeneratedContext] = check.class_param( object_cls, "object_cls") self.require_object = check.bool_param(require_object, "require_object") self.object: Optional[GeneratedContext] = None self.did_setup = False self.did_teardown = False
def make_airflow_dag_for_operator( recon_repo, job_name, operator, run_config=None, mode=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, pipeline_name=None, ): """Construct an Airflow DAG corresponding to a given Dagster job/pipeline and custom operator. `Custom operator template <https://github.com/dagster-io/dagster/blob/master/python_modules/dagster-test/dagster_test/dagster_airflow/custom_operator.py>`_ Tasks in the resulting DAG will execute the Dagster logic they encapsulate run by the given Operator :py:class:`BaseOperator <airflow.models.BaseOperator>`. If you are looking for a containerized solution to provide better isolation, see instead :py:func:`make_airflow_dag_containerized`. This function should be invoked in an Airflow DAG definition file, such as that created by an invocation of the dagster-airflow scaffold CLI tool. Args: recon_repo (:class:`dagster.ReconstructableRepository`): reference to a Dagster RepositoryDefinition that can be reconstructed in another process job_name (str): The name of the job definition. operator (type): The operator to use. Must be a class that inherits from :py:class:`BaseOperator <airflow.models.BaseOperator>` run_config (Optional[dict]): The config, if any, with which to compile the pipeline to an execution plan, as a Python dict. mode (Optional[str]): The mode in which to execute the pipeline. instance (Optional[DagsterInstance]): The Dagster instance to use to execute the pipeline. dag_id (Optional[str]): The id to use for the compiled Airflow DAG (passed through to :py:class:`DAG <airflow:airflow.models.DAG>`). dag_description (Optional[str]): The description to use for the compiled Airflow DAG (passed through to :py:class:`DAG <airflow:airflow.models.DAG>`) dag_kwargs (Optional[dict]): Any additional kwargs to pass to the Airflow :py:class:`DAG <airflow:airflow.models.DAG>` constructor, including ``default_args``. op_kwargs (Optional[dict]): Any additional kwargs to pass to the underlying Airflow operator. pipeline_name (str): (legacy) The name of the pipeline definition. Returns: (airflow.models.DAG, List[airflow.models.BaseOperator]): The generated Airflow DAG, and a list of its constituent tasks. """ check.class_param(operator, "operator", superclass=BaseOperator) job_name = canonicalize_backcompat_args( new_val=job_name, new_arg="job_name", old_val=pipeline_name, old_arg="pipeline_name", breaking_version="future versions", coerce_old_to_new=lambda val: val, ) return _make_airflow_dag( recon_repo=recon_repo, job_name=job_name, run_config=run_config, mode=mode, dag_id=dag_id, dag_description=dag_description, dag_kwargs=dag_kwargs, op_kwargs=op_kwargs, operator=operator, )
def _make_airflow_dag( recon_repo, job_name, run_config=None, mode=None, instance=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, operator=DagsterPythonOperator, ): check.inst_param(recon_repo, "recon_repo", ReconstructableRepository) check.str_param(job_name, "job_name") run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode") # Default to use the (persistent) system temp directory rather than a TemporaryDirectory, # which would not be consistent between Airflow task invocations. if instance is None: if is_dagster_home_set(): instance = DagsterInstance.get() else: instance = DagsterInstance.local_temp( tempdir=seven.get_system_temp_directory()) check.inst_param(instance, "instance", DagsterInstance) # Only used for Airflow; internally we continue to use pipeline.name dag_id = check.opt_str_param(dag_id, "dag_id", _rename_for_airflow(job_name)) dag_description = check.opt_str_param(dag_description, "dag_description", _make_dag_description(job_name)) check.class_param(operator, "operator", superclass=BaseOperator) dag_kwargs = dict( {"default_args": DEFAULT_ARGS}, **check.opt_dict_param(dag_kwargs, "dag_kwargs", key_type=str), ) op_kwargs = check.opt_dict_param(op_kwargs, "op_kwargs", key_type=str) dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs) pipeline = recon_repo.get_definition().get_pipeline(job_name) if mode is None: mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, run_config, mode=mode) tasks = {} coalesced_plan = coalesce_execution_steps(execution_plan) for solid_handle, solid_steps in coalesced_plan.items(): step_keys = [step.key for step in solid_steps] operator_parameters = DagsterOperatorParameters( recon_repo=recon_repo, pipeline_name=job_name, run_config=run_config, mode=mode, task_id=solid_handle, step_keys=step_keys, dag=dag, instance_ref=instance.get_ref(), op_kwargs=op_kwargs, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()), ) task = operator(operator_parameters) tasks[solid_handle] = task for solid_step in solid_steps: for step_input in solid_step.step_inputs: for key in step_input.dependency_keys: prev_solid_handle = execution_plan.get_step_by_key( key).solid_handle.to_string() if solid_handle != prev_solid_handle: tasks[prev_solid_handle].set_downstream(task) return (dag, [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
def test_class_param(): class Bar: pass assert check.class_param(int, "foo") assert check.class_param(Bar, "foo") with pytest.raises(CheckError): check.class_param(None, "foo") with pytest.raises(CheckError): check.class_param(check, "foo") with pytest.raises(CheckError): check.class_param(234, "foo") with pytest.raises(CheckError): check.class_param("bar", "foo") with pytest.raises(CheckError): check.class_param(Bar(), "foo") class Super: pass class Sub(Super): pass class Alone: pass assert check.class_param(Sub, "foo", superclass=Super) with pytest.raises(CheckError): assert check.class_param(Alone, "foo", superclass=Super) with pytest.raises(CheckError): assert check.class_param("value", "foo", superclass=Super) assert check.opt_class_param(Sub, "foo", superclass=Super) assert check.opt_class_param(None, "foo", superclass=Super) is None with pytest.raises(CheckError): assert check.opt_class_param(Alone, "foo", superclass=Super) with pytest.raises(CheckError): assert check.opt_class_param("value", "foo", superclass=Super)
def whitelist_for_persistence(klass): check.class_param(klass, "klass") return compose( _whitelist_for_persistence(whitelist_map=_WHITELIST_MAP), _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP), )(klass)
def whitelist_for_serdes(klass): check.class_param(klass, "klass") return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP)(klass)
def solid_execution_error_boundary(error_cls, msg_fn, step_context, **kwargs): """ A specialization of user_code_error_boundary for the steps involved in executing a solid. This variant supports the control flow exceptions RetryRequested and Failure as well as respecting the RetryPolicy if present. """ from dagster.core.execution.context.system import StepExecutionContext check.callable_param(msg_fn, "msg_fn") check.class_param(error_cls, "error_cls", superclass=DagsterUserCodeExecutionError) check.inst_param(step_context, "step_context", StepExecutionContext) with raise_execution_interrupts(): step_context.log.begin_python_log_capture() retry_policy = step_context.solid_retry_policy try: yield except DagsterError as de: # The system has thrown an error that is part of the user-framework contract raise de except Exception as e: # pylint: disable=W0703 # An exception has been thrown by user code and computation should cease # with the error reported further up the stack # Directly thrown RetryRequested escalate before evaluating the retry policy. if isinstance(e, RetryRequested): raise e if retry_policy: raise RetryRequested( max_retries=retry_policy.max_retries, seconds_to_wait=retry_policy.calculate_delay( step_context.previous_attempt_count + 1), ) from e # Failure exceptions get re-throw without wrapping if isinstance(e, Failure): raise e # Otherwise wrap the user exception with context raise error_cls( msg_fn(), user_exception=e, original_exc_info=sys.exc_info(), **kwargs, ) from e except (DagsterExecutionInterruptedError, KeyboardInterrupt) as ie: # respect retry policy when interrupts occur if retry_policy: raise RetryRequested( max_retries=retry_policy.max_retries, seconds_to_wait=retry_policy.calculate_delay( step_context.previous_attempt_count + 1), ) from ie else: raise ie finally: step_context.log.end_python_log_capture()