def define_typed_input_schema_dict(value_config_type): check.inst_param(value_config_type, "value_config_type", ConfigType) return Selector( { "value": Field(value_config_type), "json": define_path_dict_field(), "pickle": define_path_dict_field(), }, )
def define_typed_input_schema_dict(value_config_type): check.inst_param(value_config_type, 'value_config_type', ConfigType) return Selector( { 'value': Field(value_config_type), 'json': define_path_dict_field(), 'pickle': define_path_dict_field(), }, )
def convert_user_facing_definition_config_schema( potential_schema: Union["IDefinitionConfigSchema", Dict[str, Any], None], ) -> "IDefinitionConfigSchema": if potential_schema is None: return DefinitionConfigSchema( Field(ConfigAnyInstance, is_required=False)) elif isinstance(potential_schema, IDefinitionConfigSchema): return potential_schema else: return DefinitionConfigSchema( convert_potential_field(potential_schema))
def _convert_potential_field( original_root: object, potential_field: object, stack: List[str] ) -> "Field": from .field import Field if potential_field is None: raise DagsterInvalidConfigDefinitionError( original_root, potential_field, stack, reason="Fields cannot be None" ) if not is_potential_field(potential_field): raise DagsterInvalidConfigDefinitionError(original_root, potential_field, stack) if isinstance(potential_field, Field): return potential_field return Field(_convert_potential_type(original_root, potential_field, stack))
trips_schema = pa.DataFrameSchema( columns={ "bike_id": pa.Column(int, checks=pa.Check.ge(0)), # ge: greater than or equal to "start_time": pa.Column(pd.Timestamp, checks=pa.Check.ge(MIN_DATE)), "end_time": pa.Column(pd.Timestamp, checks=pa.Check.ge(MIN_DATE)), }, ) # This is a Dagster type that wraps the schema TripsDataFrame = pandera_schema_to_dagster_type( trips_schema, "TripsDataFrame", "DataFrame type for e-bike trips.") # We've added a Dagster type for this op's output @op(out=Out(TripsDataFrame), config_schema={"clean": Field(bool, False)}) def load_trips(context): df = pd.read_csv( "./ebike_trips.csv", parse_dates=["start_time", "end_time"], ) if context.op_config["clean"]: df = df[pd.notna(df.end_time)] return df # We've added a Dagster type for this op's input @op(ins={"trips": In(TripsDataFrame)}) def generate_plot(context, trips): minute_lengths = [ x.total_seconds() / 60 for x in trips.end_time - trips.start_time
def define_path_dict_field(): return {"path": Field(ConfigStringInstance)}
name=self.name, config=self.config, executor_creation_fn=fn, required_resource_keys=self.required_resource_keys, ) update_wrapper(executor_def, wrapped=fn) return executor_def @executor( name='in_process', config={ 'retries': get_retries_config(), 'marker_to_close': Field(str, is_required=False), }, ) def in_process_executor(init_context): '''The default in-process executor. In most Dagster environments, this will be the default executor. It is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly, include the following top-level fragment in config: .. code-block:: yaml execution: in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
) from dagster.config.field import Field from dagster.utils.backcompat import experimental from .types import DbtCliResult, DbtCliStatsResult from .utils import execute_dbt, get_run_results DEFAULT_DBT_EXECUTABLE = "dbt" # the following config items correspond to flags that apply to all CLI commands # https://github.com/fishtown-analytics/dbt/blob/dev/marian-anderson/core/dbt/main.py#L260-L329 CLI_COMMON_FLAGS_CONFIG_SCHEMA = { "project-dir": Field( config=StringSource, is_required=False, description= "Which directory to look in for the dbt_project.yml file. Default is the current working directory and its parents.", ), "profiles-dir": Field( config=StringSource, is_required=False, description= "Which directory to look in for the profiles.yml file. Default = $DBT_PROFILES_DIR or $HOME/.dbt", ), "profile": Field( config=StringSource, is_required=False, description= "Which profile to load. Overrides setting in dbt_project.yml.",
def _core_in_process_executor_creation(retries_config, marker_to_close): from dagster.core.executor.in_process import InProcessExecutor return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=RetryMode.from_config(retries_config), marker_to_close=marker_to_close, ) @executor( name="in_process", config_schema={ "retries": get_retries_config(), "marker_to_close": Field(str, is_required=False), }, ) def in_process_executor(init_context): """The in-process executor executes all steps in a single process. For legacy pipelines, this will be the default executor. To select it explicitly, include the following top-level fragment in config: .. code-block:: yaml execution: in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid/op metadata, where the higher the number the higher the priority. 0 is the default and both positive
check.inst_param(context, "context", OutputContext) # the output notebook itself is stored at output_file_path output_notebook_path = self._get_path(context) mkdir_p(os.path.dirname(output_notebook_path)) with open(output_notebook_path, self.write_mode) as dest_file_obj: dest_file_obj.write(obj) yield MetadataEntry.fspath(path=output_notebook_path, label="path") def load_input(self, context) -> bytes: check.inst_param(context, "context", InputContext) # pass output notebook to downstream solids as File Object with open(self._get_path(context.upstream_output), self.read_mode) as file_obj: return file_obj.read() @io_manager( config_schema={ "asset_key_prefix": Field(str, is_required=False), "base_dir": Field(str, is_required=False), }, ) def local_output_notebook_io_manager(init_context): """Built-in IO Manager that handles output notebooks.""" return LocalOutputNotebookIOManager( base_dir=init_context.resource_config.get( "base_dir", init_context.instance.storage_directory()), asset_key_prefix=init_context.resource_config.get( "asset_key_prefix", []), )
from dagster.config.field import Field from dagster.utils.backcompat import experimental from .types import DbtCliOutput from .utils import execute_cli, parse_run_results DEFAULT_DBT_EXECUTABLE = "dbt" # The following config fields correspond to flags that apply to all dbt CLI commands. For details # on dbt CLI flags, see # https://github.com/fishtown-analytics/dbt/blob/1f8e29276e910c697588c43f08bc881379fff178/core/dbt/main.py#L260-L329 CLI_COMMON_FLAGS_CONFIG_SCHEMA = { "project-dir": Field( config=StringSource, is_required=False, description=( "Which directory to look in for the dbt_project.yml file. Default is the current " "working directory and its parents." ), ), "profiles-dir": Field( config=StringSource, is_required=False, description=( "Which directory to look in for the profiles.yml file. Default = $DBT_PROFILES_DIR or " "$HOME/.dbt" ), ), "profile": Field( config=StringSource, is_required=False, description="Which profile to load. Overrides setting in dbt_project.yml.",
in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive and negative numbers can be used. ''' from dagster.core.engine.init import InitExecutorContext check.inst_param(init_context, 'init_context', InitExecutorContext) return InProcessExecutorConfig() @executor( name='multiprocess', config={'max_concurrent': Field(Int, is_required=False, default_value=0)}) def multiprocess_executor(init_context): '''The default multiprocess executor. This simple multiprocess executor is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select the multiprocess executor, include a fragment such as the following in your config: .. code-block:: yaml execution: multiprocess: max_concurrent: 4 The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
.. code-block:: yaml execution: in_process: ''' from dagster.core.engine.init import InitExecutorContext check.inst_param(init_context, 'init_context', InitExecutorContext) return InProcessExecutorConfig() @executor( name='multiprocess', config={'max_concurrent': Field(Int, is_optional=True, default_value=0)}) def multiprocess_executor(init_context): '''The default multiprocess executor. This simple multiprocess executor is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select the multiprocess executor, include a fragment such as the following in your config: .. code-block:: yaml execution: multiprocess: max_concurrent: 4 The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
executor_def = ExecutorDefinition( name=self.name, config=self.config, executor_creation_fn=fn, required_resource_keys=self.required_resource_keys, ) update_wrapper(executor_def, wrapped=fn) return executor_def @executor( name='in_process', config={'retries': get_retries_config(), 'marker_to_close': Field(str, is_required=False),}, ) def in_process_executor(init_context): '''The default in-process executor. In most Dagster environments, this will be the default executor. It is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly, include the following top-level fragment in config: .. code-block:: yaml execution: in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive
for flag in (CLI_COMMON_FLAGS | set(additional_flags)) if solid_config.get(flag) is not None } @solid( description="A solid to invoke dbt run via CLI.", input_defs=[InputDefinition(name="start_after", dagster_type=Nothing)], output_defs=[OutputDefinition(name="dbt_cli_output", dagster_type=DbtCliOutput)], config_schema={ **CLI_CONFIG_SCHEMA, "threads": Field( config=Noneable(int), default_value=None, is_required=False, description=( "Specify number of threads to use while executing models. Overrides settings " "in profiles.yml." ), ), "models": Field( config=Noneable([str]), default_value=None, is_required=False, description="The dbt models to run.", ), "exclude": Field( config=Noneable([str]), default_value=None, is_required=False, description="The dbt models to exclude.",
def define_path_dict_field(): return {'path': Field(ConfigPathInstance)}