def define_typed_input_schema_dict(value_config_type):
    check.inst_param(value_config_type, "value_config_type", ConfigType)
    return Selector(
        {
            "value": Field(value_config_type),
            "json": define_path_dict_field(),
            "pickle": define_path_dict_field(),
        }, )
Example #2
0
def define_typed_input_schema_dict(value_config_type):
    check.inst_param(value_config_type, 'value_config_type', ConfigType)
    return Selector(
        {
            'value': Field(value_config_type),
            'json': define_path_dict_field(),
            'pickle': define_path_dict_field(),
        }, )
def convert_user_facing_definition_config_schema(
    potential_schema: Union["IDefinitionConfigSchema", Dict[str, Any], None],
) -> "IDefinitionConfigSchema":
    if potential_schema is None:
        return DefinitionConfigSchema(
            Field(ConfigAnyInstance, is_required=False))
    elif isinstance(potential_schema, IDefinitionConfigSchema):
        return potential_schema
    else:
        return DefinitionConfigSchema(
            convert_potential_field(potential_schema))
Example #4
0
def _convert_potential_field(
    original_root: object, potential_field: object, stack: List[str]
) -> "Field":
    from .field import Field

    if potential_field is None:
        raise DagsterInvalidConfigDefinitionError(
            original_root, potential_field, stack, reason="Fields cannot be None"
        )

    if not is_potential_field(potential_field):
        raise DagsterInvalidConfigDefinitionError(original_root, potential_field, stack)

    if isinstance(potential_field, Field):
        return potential_field

    return Field(_convert_potential_type(original_root, potential_field, stack))
Example #5
0
trips_schema = pa.DataFrameSchema(
    columns={
        "bike_id":
        pa.Column(int, checks=pa.Check.ge(0)),  # ge: greater than or equal to
        "start_time": pa.Column(pd.Timestamp, checks=pa.Check.ge(MIN_DATE)),
        "end_time": pa.Column(pd.Timestamp, checks=pa.Check.ge(MIN_DATE)),
    }, )

# This is a Dagster type that wraps the schema
TripsDataFrame = pandera_schema_to_dagster_type(
    trips_schema, "TripsDataFrame", "DataFrame type for e-bike trips.")


# We've added a Dagster type for this op's output
@op(out=Out(TripsDataFrame), config_schema={"clean": Field(bool, False)})
def load_trips(context):
    df = pd.read_csv(
        "./ebike_trips.csv",
        parse_dates=["start_time", "end_time"],
    )
    if context.op_config["clean"]:
        df = df[pd.notna(df.end_time)]
    return df


# We've added a Dagster type for this op's input
@op(ins={"trips": In(TripsDataFrame)})
def generate_plot(context, trips):
    minute_lengths = [
        x.total_seconds() / 60 for x in trips.end_time - trips.start_time
def define_path_dict_field():
    return {"path": Field(ConfigStringInstance)}
Example #7
0
            name=self.name,
            config=self.config,
            executor_creation_fn=fn,
            required_resource_keys=self.required_resource_keys,
        )

        update_wrapper(executor_def, wrapped=fn)

        return executor_def


@executor(
    name='in_process',
    config={
        'retries': get_retries_config(),
        'marker_to_close': Field(str, is_required=False),
    },
)
def in_process_executor(init_context):
    '''The default in-process executor.

    In most Dagster environments, this will be the default executor. It is available by default on
    any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
Example #8
0
)
from dagster.config.field import Field
from dagster.utils.backcompat import experimental

from .types import DbtCliResult, DbtCliStatsResult
from .utils import execute_dbt, get_run_results

DEFAULT_DBT_EXECUTABLE = "dbt"

# the following config items correspond to flags that apply to all CLI commands
# https://github.com/fishtown-analytics/dbt/blob/dev/marian-anderson/core/dbt/main.py#L260-L329
CLI_COMMON_FLAGS_CONFIG_SCHEMA = {
    "project-dir":
    Field(
        config=StringSource,
        is_required=False,
        description=
        "Which directory to look in for the dbt_project.yml file. Default is the current working directory and its parents.",
    ),
    "profiles-dir":
    Field(
        config=StringSource,
        is_required=False,
        description=
        "Which directory to look in for the profiles.yml file. Default = $DBT_PROFILES_DIR or $HOME/.dbt",
    ),
    "profile":
    Field(
        config=StringSource,
        is_required=False,
        description=
        "Which profile to load. Overrides setting in dbt_project.yml.",
Example #9
0
def _core_in_process_executor_creation(retries_config, marker_to_close):
    from dagster.core.executor.in_process import InProcessExecutor

    return InProcessExecutor(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=RetryMode.from_config(retries_config),
        marker_to_close=marker_to_close,
    )


@executor(
    name="in_process",
    config_schema={
        "retries": get_retries_config(),
        "marker_to_close": Field(str, is_required=False),
    },
)
def in_process_executor(init_context):
    """The in-process executor executes all steps in a single process.

    For legacy pipelines, this will be the default executor. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid/op metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
Example #10
0
        check.inst_param(context, "context", OutputContext)

        # the output notebook itself is stored at output_file_path
        output_notebook_path = self._get_path(context)
        mkdir_p(os.path.dirname(output_notebook_path))
        with open(output_notebook_path, self.write_mode) as dest_file_obj:
            dest_file_obj.write(obj)
        yield MetadataEntry.fspath(path=output_notebook_path, label="path")

    def load_input(self, context) -> bytes:
        check.inst_param(context, "context", InputContext)
        # pass output notebook to downstream solids as File Object
        with open(self._get_path(context.upstream_output),
                  self.read_mode) as file_obj:
            return file_obj.read()


@io_manager(
    config_schema={
        "asset_key_prefix": Field(str, is_required=False),
        "base_dir": Field(str, is_required=False),
    }, )
def local_output_notebook_io_manager(init_context):
    """Built-in IO Manager that handles output notebooks."""
    return LocalOutputNotebookIOManager(
        base_dir=init_context.resource_config.get(
            "base_dir", init_context.instance.storage_directory()),
        asset_key_prefix=init_context.resource_config.get(
            "asset_key_prefix", []),
    )
Example #11
0
from dagster.config.field import Field
from dagster.utils.backcompat import experimental

from .types import DbtCliOutput
from .utils import execute_cli, parse_run_results

DEFAULT_DBT_EXECUTABLE = "dbt"

# The following config fields correspond to flags that apply to all dbt CLI commands. For details
# on dbt CLI flags, see
# https://github.com/fishtown-analytics/dbt/blob/1f8e29276e910c697588c43f08bc881379fff178/core/dbt/main.py#L260-L329
CLI_COMMON_FLAGS_CONFIG_SCHEMA = {
    "project-dir": Field(
        config=StringSource,
        is_required=False,
        description=(
            "Which directory to look in for the dbt_project.yml file. Default is the current "
            "working directory and its parents."
        ),
    ),
    "profiles-dir": Field(
        config=StringSource,
        is_required=False,
        description=(
            "Which directory to look in for the profiles.yml file. Default = $DBT_PROFILES_DIR or "
            "$HOME/.dbt"
        ),
    ),
    "profile": Field(
        config=StringSource,
        is_required=False,
        description="Which profile to load. Overrides setting in dbt_project.yml.",
Example #12
0
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    '''
    from dagster.core.engine.init import InitExecutorContext

    check.inst_param(init_context, 'init_context', InitExecutorContext)

    return InProcessExecutorConfig()


@executor(
    name='multiprocess',
    config={'max_concurrent': Field(Int, is_required=False, default_value=0)})
def multiprocess_executor(init_context):
    '''The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
Example #13
0
    .. code-block:: yaml

        execution:
          in_process:

    '''
    from dagster.core.engine.init import InitExecutorContext

    check.inst_param(init_context, 'init_context', InitExecutorContext)

    return InProcessExecutorConfig()


@executor(
    name='multiprocess',
    config={'max_concurrent': Field(Int, is_optional=True, default_value=0)})
def multiprocess_executor(init_context):
    '''The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
Example #14
0
        executor_def = ExecutorDefinition(
            name=self.name,
            config=self.config,
            executor_creation_fn=fn,
            required_resource_keys=self.required_resource_keys,
        )

        update_wrapper(executor_def, wrapped=fn)

        return executor_def


@executor(
    name='in_process',
    config={'retries': get_retries_config(), 'marker_to_close': Field(str, is_required=False),},
)
def in_process_executor(init_context):
    '''The default in-process executor.

    In most Dagster environments, this will be the default executor. It is available by default on
    any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
Example #15
0
        for flag in (CLI_COMMON_FLAGS | set(additional_flags))
        if solid_config.get(flag) is not None
    }


@solid(
    description="A solid to invoke dbt run via CLI.",
    input_defs=[InputDefinition(name="start_after", dagster_type=Nothing)],
    output_defs=[OutputDefinition(name="dbt_cli_output", dagster_type=DbtCliOutput)],
    config_schema={
        **CLI_CONFIG_SCHEMA,
        "threads": Field(
            config=Noneable(int),
            default_value=None,
            is_required=False,
            description=(
                "Specify number of threads to use while executing models. Overrides settings "
                "in profiles.yml."
            ),
        ),
        "models": Field(
            config=Noneable([str]),
            default_value=None,
            is_required=False,
            description="The dbt models to run.",
        ),
        "exclude": Field(
            config=Noneable([str]),
            default_value=None,
            is_required=False,
            description="The dbt models to exclude.",
def define_path_dict_field():
    return {'path': Field(ConfigPathInstance)}