def as_dagster_type( existing_type, name=None, description=None, input_schema=None, output_schema=None, serialization_strategy=None, ): check.type_param(existing_type, 'existing_type') check.opt_str_param(name, 'name') check.opt_str_param(description, 'description') check.opt_inst_param(input_schema, 'input_schema', InputSchema) check.opt_inst_param(output_schema, 'output_schema', OutputSchema) check.opt_inst_param(serialization_strategy, 'serialization_strategy', SerializationStrategy) if serialization_strategy is None: serialization_strategy = PickleSerializationStrategy() name = existing_type.__name__ if name is None else name return _decorate_as_dagster_type( existing_type, key=name, name=name, description=description, input_schema=input_schema, output_schema=output_schema, serialization_strategy=serialization_strategy, )
def as_dagster_type( existing_type, name=None, description=None, input_schema=None, output_schema=None, marshalling_strategy=None, ): check.type_param(existing_type, 'existing_type') check.opt_str_param(name, 'name') check.opt_str_param(description, 'description') check.opt_inst_param(input_schema, 'input_schema', InputSchema) check.opt_inst_param(output_schema, 'output_schema', OutputSchema) check.opt_inst_param(marshalling_strategy, 'marshalling_strategy', MarshallingStrategy) if marshalling_strategy is None: marshalling_strategy = PickleMarshallingStrategy() return _decorate_as_dagster_type( existing_type, name=existing_type.__name__ if name is None else name, description=description, input_schema=input_schema, output_schema=output_schema, marshalling_strategy=marshalling_strategy, )
def _with_args(bare_cls): check.type_param(bare_cls, 'bare_cls') new_name = name if name else bare_cls.__name__ make_python_type_usable_as_dagster_type( bare_cls, PythonObjectDagsterType( name=new_name, description=description, python_type=bare_cls, loader=canonicalize_backcompat_args( loader, 'loader', input_hydration_config, 'input_hydration_config', '0.10.0', ), materializer=canonicalize_backcompat_args( materializer, 'materializer', output_materialization_config, 'output_materialization_config', '0.10.0', ), serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, ), ) return bare_cls
def _with_args(bare_cls): check.type_param(bare_cls, 'bare_cls') new_name = name if name else bare_cls.__name__ return _decorate_as_dagster_type(bare_cls=bare_cls, key=new_name, name=new_name, description=description)
def as_dagster_type( existing_type, name=None, description=None, input_schema=None, output_schema=None, serialization_strategy=None, storage_plugins=None, ): ''' Takes a python cls and creates a type for it in the Dagster domain. Args: existing_type (cls) The python type you want to project in to the Dagster type system. name (Optional[str]): description (Optiona[str]): input_schema (Optional[InputSchema]): An instance of a class that inherits from :py:class:`InputSchema` that can map config data to a value of this type. output_schema (Optiona[OutputSchema]): An instance of a class that inherits from :py:class:`OutputSchema` that can map config data to persisting values of this type. serialization_strategy (Optional[SerializationStrategy]): The default behavior for how to serialize this value for persisting between execution steps. storage_plugins (Optional[Dict[RunStorageMode, TypeStoragePlugin]]): Storage type specific overrides for the serialization strategy. This allows for storage specific optimzations such as effecient distributed storage on S3. ''' check.type_param(existing_type, 'existing_type') check.opt_str_param(name, 'name') check.opt_str_param(description, 'description') check.opt_inst_param(input_schema, 'input_schema', InputSchema) check.opt_inst_param(output_schema, 'output_schema', OutputSchema) check.opt_inst_param(serialization_strategy, 'serialization_strategy', SerializationStrategy) storage_plugins = check.opt_dict_param(storage_plugins, 'storage_plugins') if serialization_strategy is None: serialization_strategy = PickleSerializationStrategy() name = existing_type.__name__ if name is None else name return _decorate_as_dagster_type( existing_type, key=name, name=name, description=description, input_schema=input_schema, output_schema=output_schema, serialization_strategy=serialization_strategy, storage_plugins=storage_plugins, )
def resolve_config_cls_arg(config_cls): if BuiltinEnum.contains(config_cls): return ConfigType.from_builtin_enum(config_cls) elif isinstance(config_cls, WrappingListType): return List(resolve_config_cls_arg(config_cls.inner_type)) elif isinstance(config_cls, WrappingNullableType): return Nullable(resolve_config_cls_arg(config_cls.inner_type)) else: check.type_param(config_cls, 'config_cls') check.param_invariant(issubclass(config_cls, ConfigType), 'config_cls') return config_cls.inst()
def _with_args(bare_cls): check.type_param(bare_cls, 'bare_cls') new_name = name if name else bare_cls.__name__ return _decorate_as_dagster_type( bare_cls=bare_cls, name=new_name, description=description, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, )
def _with_args(bare_cls): check.type_param(bare_cls, 'bare_cls') new_name = name if name else bare_cls.__name__ return _decorate_as_dagster_type( bare_cls=bare_cls, key=new_name, name=new_name, description=description, input_schema=input_schema, output_schema=output_schema, serialization_strategy=serialization_strategy, storage_plugins=storage_plugins, )
def _with_args(bare_cls): check.type_param(bare_cls, "bare_cls") new_name = name if name else bare_cls.__name__ make_python_type_usable_as_dagster_type( bare_cls, PythonObjectDagsterType( name=new_name, description=description, python_type=bare_cls, loader=loader, materializer=materializer, ), ) return bare_cls
def __init__( self, python_type: t.Union[t.Type, t.Tuple[t.Type, ...]], key: t.Optional[str] = None, name: t.Optional[str] = None, **kwargs, ): if isinstance(python_type, tuple): self.python_type = check.tuple_param( python_type, "python_type", of_shape=tuple(type for item in python_type) ) self.type_str = "Union[{}]".format( ", ".join(python_type.__name__ for python_type in python_type) ) typing_type = t.Union[python_type] # type: ignore else: self.python_type = check.type_param(python_type, "python_type") # type: ignore self.type_str = cast(str, python_type.__name__) typing_type = self.python_type # type: ignore name = check.opt_str_param(name, "name", self.type_str) key = check.opt_str_param(key, "key", name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=isinstance_type_check_fn(python_type, name, self.type_str), typing_type=typing_type, **kwargs, )
def __init__(self, generator, object_cls, require_object=True): self.generator = check.generator(generator) self.object_cls = check.type_param(object_cls, "object_cls") self.require_object = check.bool_param(require_object, "require_object") self.object = None self.did_setup = False self.did_teardown = False
def __init__(self, generator, object_cls, require_object=True): self.generator = check.generator(generator) self.object_cls = check.type_param(object_cls, 'object_cls') self.require_object = check.bool_param(require_object, 'require_object') self.object = None self.has_setup = False
def __init__(self, python_type, key=None, name=None, **kwargs): self.python_type = check.type_param(python_type, 'python_type') name = check.opt_str_param(name, 'name', python_type.__name__) key = check.opt_str_param(key, 'key', name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=self.type_check_method, **kwargs )
def __init__( self, name, python_type, description=None, ): super(PythonObjectType, self).__init__(name, description) self.python_type = check.type_param(python_type, 'python_type')
def _with_args(bare_cls): check.type_param(bare_cls, 'bare_cls') new_name = name if name else bare_cls.__name__ make_python_type_usable_as_dagster_type( bare_cls, PythonObjectDagsterType( name=new_name, description=description, python_type=bare_cls, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, ), ) return bare_cls
def __init__(self, python_type, key=None, name=None, metadata_fn=None, **kwargs): name = check.opt_str_param(name, 'name', type(self).__name__) key = check.opt_str_param(key, 'key', name) super(PythonObjectType, self).__init__(key=key, name=name, **kwargs) self.python_type = check.type_param(python_type, 'python_type') self.metadata_fn = check.opt_callable_param(metadata_fn, 'metadata_fn')
def __init__(self, python_type=None, key=None, name=None, type_check=None, **kwargs): name = check.opt_str_param(name, 'name', type(self).__name__) key = check.opt_str_param(key, 'key', name) super(PythonObjectType, self).__init__(key=key, name=name, **kwargs) self.python_type = check.type_param(python_type, 'python_type') self._user_type_check = check.opt_callable_param( type_check, 'type_check')
def unary_api_cli_command(name, help_str, input_cls, output_cls): ''' Use this to decorate synchronous api cli commands that take one object and return one object. ''' check.str_param(name, 'name') check.str_param(help_str, 'help_str') check.type_param(input_cls, 'input_cls') check.inst_param(output_cls, 'output_cls', (tuple, check.type_types)) def wrap(fn): @click.command(name=name, help=help_str) @click.argument('input_file', type=click.Path()) @click.argument('output_file', type=click.Path()) def command(input_file, output_file): args = check.inst(read_unary_input(input_file), input_cls) output = check.inst(fn(args), output_cls) ipc_write_unary_response(output_file, output) return command return wrap
def unary_api_cli_command(name, help_str, input_cls, output_cls): """ Use this to decorate synchronous api cli commands that take one object and return one object. """ check.str_param(name, "name") check.str_param(help_str, "help_str") check.type_param(input_cls, "input_cls") check.inst_param(output_cls, "output_cls", (tuple, check.type_types)) def wrap(fn): @click.command(name=name, help=help_str) @click.argument("input_file", type=click.Path()) @click.argument("output_file", type=click.Path()) def command(input_file, output_file): args = check.inst(read_unary_input(input_file), input_cls) output = check.inst(fn(args), output_cls) ipc_write_unary_response(output_file, output) return command return wrap
def __init__( self, generator: Generator[Union["DagsterEvent", GeneratedContext], None, None], object_cls: Type[GeneratedContext], require_object: Optional[bool] = True, ): self.generator = check.generator(generator) self.object_cls: Type[GeneratedContext] = check.type_param( object_cls, "object_cls") self.require_object = check.bool_param(require_object, "require_object") self.object: Optional[GeneratedContext] = None self.did_setup = False self.did_teardown = False
def __init__(self, python_type, key=None, name=None, **kwargs): if isinstance(python_type, tuple): self.python_type = check.tuple_param( python_type, "python_type", of_type=tuple(check.type_types for item in python_type) ) self.type_str = "Union[{}]".format( ", ".join(python_type.__name__ for python_type in python_type) ) else: self.python_type = check.type_param(python_type, "python_type") self.type_str = python_type.__name__ name = check.opt_str_param(name, "name", self.type_str) key = check.opt_str_param(key, "key", name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=self.type_check_method, **kwargs )
def as_dagster_type( existing_type, name=None, description=None, input_hydration_config=None, output_materialization_config=None, serialization_strategy=None, auto_plugins=None, typecheck_metadata_fn=None, ): ''' See documentation for :py:func:`define_python_dagster_type` for parameters. Takes a python cls and creates a type for it in the Dagster domain. Frequently you want to import a data processing library and use its types directly in solid definitions. To support this dagster has this facility that allows one to annotate *existing* classes as dagster type. Note: It does this by setting a magical property (current "__runtime_type") on the class itself pointing to the dagster type associated with the python class e.g. from existing_library import FancyDataType as ExistingFancyDataType FancyDataType = as_dagster_type(existing_type=ExistingFancyDataType, name='FancyDataType') While one *could* use the existing type directly from the original library, we would recommend using the object retrned by as_dagster_type to avoid an import-order-based bugs. See dagster_pandas for an example of how to do this. ''' return _decorate_as_dagster_type( bare_cls=check.type_param(existing_type, 'existing_type'), name=check.opt_str_param(name, 'name', existing_type.__name__), description=description, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, typecheck_metadata_fn=typecheck_metadata_fn, )
def as_dagster_type( existing_type, name=None, description=None, input_hydration_config=None, output_materialization_config=None, serialization_strategy=None, auto_plugins=None, typecheck_metadata_fn=None, type_check=None, ): ''' See documentation for :py:func:`define_python_dagster_type` for parameters. Takes a python cls and creates a type for it in the Dagster domain. Frequently you want to import a data processing library and use its types directly in solid definitions. To support this dagster has this facility that allows one to annotate *existing* classes as dagster type. from existing_library import FancyDataType as ExistingFancyDataType FancyDataType = as_dagster_type(existing_type=ExistingFancyDataType, name='FancyDataType') While one *could* use the existing type directly from the original library, we would recommend using the object returned by as_dagster_type to avoid an import-order-based bugs. See dagster_pandas for an example of how to do this. {args_docstring} '''.format(args_docstring=PYTHON_DAGSTER_TYPE_ARGS_DOCSTRING) return _decorate_as_dagster_type( bare_cls=check.type_param(existing_type, 'existing_type'), name=check.opt_str_param(name, 'name', existing_type.__name__), description=description, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, typecheck_metadata_fn=typecheck_metadata_fn, type_check=type_check, )
def as_dagster_type( existing_type, name=None, description=None, input_hydration_config=None, output_materialization_config=None, serialization_strategy=None, auto_plugins=None, typecheck_metadata_fn=None, type_check=None, ): '''Create a Dagster type corresponding to an existing Python type. This function allows you to explicitly wrap existing types in a new Dagster type, and is especially useful when using library types (e.g., from a data processing library) that might require additional functionality such as input config to be useful in your pipelines. Args: python_type (cls): The python type to wrap as a Dagster type. name (Optional[str]): Name of the new Dagster type. If ``None``, the name (``__name__``) of the ``python_type`` will be used. description (Optional[str]): A user-readable description of the type. input_hydration_config (Optional[InputHydrationConfig]): An instance of a class that inherits from :py:class:`InputHydrationConfig` and can map config data to a value of this type. Specify this argument if you will need to shim values of this type using the config machinery. As a rule, you should use the :py:func:`@input_hydration_config <dagster.InputHydrationConfig>` decorator to construct these arguments. output_materialization_config (Optiona[OutputMaterializationConfig]): An instance of a class that inherits from :py:class:`OutputMaterializationConfig` and can persist values of this type. As a rule, you should use the :py:func:`@output_materialization_config <dagster.output_materialization_config>` decorator to construct these arguments. serialization_strategy (Optional[SerializationStrategy]): An instance of a class that inherits from :py:class:`SerializationStrategy`. The default strategy for serializing this value when automatically persisting it between execution steps. You should set this value if the ordinary serialization machinery (e.g., pickle) will not be adequate for this type. auto_plugins (Optional[List[TypeStoragePlugin]]): If types must be serialized differently depending on the storage being used for intermediates, they should specify this argument. In these cases the serialization_strategy argument is not sufficient because serialization requires specialized API calls, e.g. to call an S3 API directly instead of using a generic file object. See ``dagster_pyspark.DataFrame`` for an example. typecheck_metadata_fn (Optional[Callable[[Any], TypeCheck]]): If specified, this function will be called to emit metadata when you successfully check a type. The typecheck_metadata_fn will be passed the value being type-checked and should return an instance of :py:class:`TypeCheck`. See ``dagster_pandas.DataFrame`` for an example. type_check (Optional[Callable[[Any], Any]]): If specified, this function will be called in place of the default isinstance type check. This function should raise :py:class:`Failure` if the type check fails, and otherwise pass. Its return value will be ignored. **Example**: .. code-block:: python # Partial example drawn from dagster_pandas.DataFrame DataFrame = as_dagster_type( pd.DataFrame, name='PandasDataFrame', description=\'\'\'Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). See http://pandas.pydata.org/\'\'\', input_hydration_config=dataframe_input_schema, output_materialization_config=dataframe_output_schema, typecheck_metadata_fn=lambda value: TypeCheck( metadata_entries=[ EventMetadataEntry.text( str(len(value)), 'row_count', 'Number of rows in DataFrame' ), # string cast columns since they may be things like datetime EventMetadataEntry.json({'columns': list(map(str, value.columns))}, 'metadata'), ] ), ) See, e.g., ``dagster_pandas.DataFrame`` and ``dagster_pyspark.SparkRDD`` for fuller worked examples. ''' return _decorate_as_dagster_type( bare_cls=check.type_param(existing_type, 'existing_type'), name=check.opt_str_param(name, 'name', existing_type.__name__), description=description, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, typecheck_metadata_fn=typecheck_metadata_fn, type_check=type_check, )
def test_type_param(): class Bar(object): pass assert check.type_param(int, 'foo') assert check.type_param(Bar, 'foo') with pytest.raises(CheckError): check.type_param(None, 'foo') with pytest.raises(CheckError): check.type_param(check, 'foo') with pytest.raises(CheckError): check.type_param(234, 'foo') with pytest.raises(CheckError): check.type_param('bar', 'foo') with pytest.raises(CheckError): check.type_param(Bar(), 'foo')
def test_type_param(): class Bar(object): pass assert check.type_param(int, "foo") assert check.type_param(Bar, "foo") with pytest.raises(CheckError): check.type_param(None, "foo") with pytest.raises(CheckError): check.type_param(check, "foo") with pytest.raises(CheckError): check.type_param(234, "foo") with pytest.raises(CheckError): check.type_param("bar", "foo") with pytest.raises(CheckError): check.type_param(Bar(), "foo")
def make_klass_runtime_type_decorated_klass(klass, runtime_type): check.type_param(klass, 'klass') check.inst_param(runtime_type, 'runtime_type', RuntimeType) setattr(klass, MAGIC_RUNTIME_TYPE_NAME, runtime_type)
def get_runtime_type_on_decorated_klass(klass): check.type_param(klass, 'klass') return getattr(klass, MAGIC_RUNTIME_TYPE_NAME)
def is_runtime_type_decorated_klass(klass): check.type_param(klass, 'klass') return hasattr(klass, MAGIC_RUNTIME_TYPE_NAME)
def define_python_dagster_type( python_type, name=None, description=None, input_hydration_config=None, output_materialization_config=None, serialization_strategy=None, auto_plugins=None, type_check=None, ): '''Core machinery for defining a Dagster type corresponding to an existing python type. Users should generally use the :py:func:`@dagster_type` decorator or :py:func:`as_dagster_type`, both of which defer to this function. Args: python_type (cls): The python type to wrap as a Dagster type. name (Optional[str]): Name of the new Dagster type. If ``None``, the name (``__name__``) of the ``python_type`` will be used. description (Optional[str]): A user-readable description of the type. input_hydration_config (Optional[InputHydrationConfig]): An instance of a class constructed using the :py:func:`@input_hydration_config <dagster.InputHydrationConfig>` decorator that can map config data to a value of this type. output_materialization_config (Optiona[OutputMaterializationConfig]): An instance of a class constructed using the :py:func:`@output_materialization_config <dagster.output_materialization_config>` decorator that can persist values of this type. serialization_strategy (Optional[SerializationStrategy]): An instance of a class that inherits from :py:class:`SerializationStrategy`. The default strategy for serializing this value when automatically persisting it between execution steps. You should set this value if the ordinary serialization machinery (e.g., pickle) will not be adequate for this type. auto_plugins (Optional[List[TypeStoragePlugin]]): If types must be serialized differently depending on the storage being used for intermediates, they should specify this argument. In these cases the serialization_strategy argument is not sufficient because serialization requires specialized API calls, e.g. to call an S3 API directly instead of using a generic file object. See ``dagster_pyspark.DataFrame`` for an example. type_check (Optional[Callable[[Any], Union[bool, TypeCheck]]]): If specified, this function will be called in place of the default isinstance type check. This function should return ``True`` if the type check succeds, ``False`` if it fails, or, if additional metadata should be emitted along with the type check success or failure, an instance of :py:class:`TypeCheck` with the ``success`` field set appropriately. ''' check.type_param(python_type, 'python_type') check.opt_str_param(name, 'name', python_type.__name__) check.opt_str_param(description, 'description') check.opt_inst_param(input_hydration_config, 'input_hydration_config', InputHydrationConfig) check.opt_inst_param(output_materialization_config, 'output_materialization_config', OutputMaterializationConfig) check.opt_inst_param( serialization_strategy, 'serialization_strategy', SerializationStrategy, default=PickleSerializationStrategy(), ) auto_plugins = check.opt_list_param(auto_plugins, 'auto_plugins', of_type=type) check.param_invariant( all( issubclass(auto_plugin_type, TypeStoragePlugin) for auto_plugin_type in auto_plugins), 'auto_plugins', ) check.opt_callable_param(type_check, 'type_check') return PythonObjectType( python_type=python_type, name=name, description=description, input_hydration_config=input_hydration_config, output_materialization_config=output_materialization_config, serialization_strategy=serialization_strategy, auto_plugins=auto_plugins, type_check=type_check, )