Example #1
0
def test_op_config_recursive():
    @op(config_schema={"solids": Permissive(), "ops": Permissive()})
    def my_op(context):
        return context.op_config

    @graph
    def my_graph():
        return my_op()

    config = {
        "solids": {
            "solids": {
                "foo": {
                    "config": {
                        "foobar": "bar"
                    }
                }
            }
        },
        "ops": {
            "solids": {
                "foo": {
                    "config": {
                        "foobar": "bar"
                    }
                }
            }
        },
    }
    result = my_graph.execute_in_process(run_config={
        "ops": {
            "my_graph": {
                "ops": {
                    "my_op": {
                        "config": config
                    }
                }
            }
        }
    })
    assert result.success
    assert result.output_values["result"] == config

    @graph
    def solids():
        return my_op()

    result = solids.execute_in_process(
        run_config={"ops": {
            "solids": {
                "ops": {
                    "my_op": {
                        "config": config
                    }
                }
            }
        }})
    assert result.success
    assert result.output_values["result"] == config
Example #2
0
def test_construct_same_perm_dicts():
    int_perm_dict_1 = Permissive(fields={'an_int': Field(int)})
    int_perm_dict_2 = Permissive(fields={'an_int': Field(int)})

    # assert identical object
    assert int_perm_dict_1 is int_perm_dict_2
    # assert equivalent key
    assert int_perm_dict_1.key == int_perm_dict_2.key
Example #3
0
def test_config_naming_collisions():
    @op(config_schema={"solids": Permissive(), "ops": Permissive()})
    def my_op(context):
        return context.op_config

    @graph
    def my_graph():
        return my_op()

    config = {
        "solids": {
            "solids": {
                "foo": {
                    "config": {
                        "foobar": "bar"
                    }
                }
            }
        },
        "ops": {
            "solids": {
                "foo": {
                    "config": {
                        "foobar": "bar"
                    }
                }
            }
        },
    }
    result = my_graph.execute_in_process(
        run_config={"ops": {
            "my_op": {
                "config": config
            }
        }})
    assert result.success
    assert result.output_value() == config

    @graph
    def ops():
        return my_op()

    result = ops.execute_in_process(
        run_config={"ops": {
            "my_op": {
                "config": config
            }
        }})
    assert result.success
    assert result.output_value() == config
Example #4
0
def _dataframe_materializer_config():
    to_fields = {
        write_to: Permissive({
            option_name: Field(option_args[0],
                               is_required=option_args[1],
                               description=option_args[2])
            for option_name, option_args in to_opts["options"].items()
        })
        for write_to, to_opts in DataFrameToTypes.items()
    }

    return Shape({
        "to": Field(
            Selector(to_fields),
            is_required=False,
        ),
        **{
            util_name: util_spec["options"]
            for util_name, util_spec in DataFrameUtilities.items()
        },
        # https://github.com/dagster-io/dagster/issues/2872
        **{
            field_name: Field(
                field_config,
                is_required=False,
            )
            for field_name, field_config in to_fields.items()
        },
    })
def _construct_permissive_from_snap(config_type_snap, config_snap_map):
    check.opt_list_param(config_type_snap.fields, "config_field_snap", ConfigFieldSnap)

    return Permissive(
        fields=_construct_fields(config_type_snap, config_snap_map),
        description=config_type_snap.description,
    )
Example #6
0
def shell_op_config():
    return {
        "env": Field(
            Noneable(Permissive()),
            is_required=False,
            description="An optional dict of environment variables to pass to the subprocess.",
        ),
        "output_logging": Field(
            Enum(
                name="OutputType",
                enum_values=[
                    EnumValue("STREAM", description="Stream script stdout/stderr."),
                    EnumValue(
                        "BUFFER",
                        description="Buffer shell script stdout/stderr, then log upon completion.",
                    ),
                    EnumValue("NONE", description="No logging"),
                ],
            ),
            is_required=False,
            default_value="BUFFER",
        ),
        "cwd": Field(
            Noneable(str),
            default_value=None,
            is_required=False,
            description="Working directory in which to execute shell script",
        ),
    }
Example #7
0
def test_permissive_dict_with_fields():
    perm_dict_with_field = Permissive({'a_key': Field(str)})

    assert _validate(perm_dict_with_field, {'a_key': 'djfkdjkfd'}).success
    assert _validate(perm_dict_with_field, {'a_key': 'djfkdjkfd', 'extra_key': 'kdjkfd'}).success
    assert not _validate(perm_dict_with_field, {'a_key': 2}).success
    assert not _validate(perm_dict_with_field, {}).success
Example #8
0
def shell_solid_config():
    return {
        'env': Field(
            Noneable(Permissive()),
            default_value=os.environ.copy(),
            is_required=False,
            description='An optional dict of environment variables to pass to the subprocess. '
            'Defaults to using os.environ.copy().',
        ),
        'output_logging': Field(
            Enum(
                name='OutputType',
                enum_values=[
                    EnumValue('STREAM', description='Stream script stdout/stderr.'),
                    EnumValue(
                        'BUFFER',
                        description='Buffer shell script stdout/stderr, then log upon completion.',
                    ),
                    EnumValue('NONE', description='No logging'),
                ],
            ),
            is_required=False,
            default_value='BUFFER',
        ),
        'cwd': Field(
            Noneable(str),
            default_value=None,
            is_required=False,
            description='Working directory in which to execute shell script',
        ),
    }
Example #9
0
def _dataframe_loader_config():
    read_fields = {
        read_from: Permissive({
            option_name: Field(option_args[0],
                               is_required=option_args[1],
                               description=option_args[2])
            for option_name, option_args in read_opts["options"].items()
        })
        for read_from, read_opts in DataFrameReadTypes.items()
    }

    return Shape({
        "read": Field(
            Selector(read_fields),
            is_required=False,
        ),
        **{
            util_name: util_spec["options"]
            for util_name, util_spec in DataFrameUtilities.items()
        },
        # https://github.com/dagster-io/dagster/issues/2872
        **{
            field_name: Field(
                field_config,
                is_required=False,
            )
            for field_name, field_config in read_fields.items()
        },
    })
Example #10
0
def test_permissive_dict_with_fields():
    perm_dict_with_field = Permissive({"a_key": Field(str)})

    assert validate_config(perm_dict_with_field, {"a_key": "djfkdjkfd"}).success
    assert validate_config(
        perm_dict_with_field, {"a_key": "djfkdjkfd", "extra_key": "kdjkfd"}
    ).success
    assert not validate_config(perm_dict_with_field, {"a_key": 2}).success
    assert not validate_config(perm_dict_with_field, {}).success
Example #11
0
def _base_config():
    return {
        'error_rate': Field(float, is_required=False, default_value=0.0),
        'sleep': Field(float, is_required=False, default_value=0.5),
        'materialization_key': Field(str, is_required=False),
        'materialization_text': Field(str, is_required=False),
        'materialization_url': Field(str, is_required=False),
        'materialization_path': Field(str, is_required=False),
        'materialization_json': Field(Permissive(), is_required=False),
        'materialization_value': Field(float, is_required=False),
    }
Example #12
0
def _base_config():
    return {
        "error_rate": Field(float, is_required=False, default_value=0.0),
        "sleep": Field(float, is_required=False, default_value=0.5),
        "materialization_key_list": Field(Array(str), is_required=False),
        "materialization_key": Field(str, is_required=False),
        "materialization_text": Field(str, is_required=False),
        "materialization_url": Field(str, is_required=False),
        "materialization_path": Field(str, is_required=False),
        "materialization_json": Field(Permissive(), is_required=False),
        "materialization_value": Field(float, is_required=False),
    }
Example #13
0
def celery_docker_config():
    additional_config = {
        "docker":
        Field(
            {
                "image":
                Field(
                    StringSource,
                    is_required=False,
                    description=
                    "The docker image to be used for step execution.",
                ),
                "registry":
                Field(
                    {
                        "url": Field(StringSource),
                        "username": Field(StringSource),
                        "password": Field(StringSource),
                    },
                    is_required=False,
                    description=
                    "Information for using a non local/public docker registry",
                ),
                "env_vars":
                Field(
                    [str],
                    is_required=False,
                    description=
                    "The list of environment variables names to forward from the celery worker in to the docker container",
                ),
                "network":
                Field(
                    str,
                    is_required=False,
                    description=
                    "Name of the network this container will be connected to at creation time",
                ),
                "container_kwargs":
                Field(
                    Permissive(),
                    is_required=False,
                    description=
                    "Additional keyword args for the docker container",
                ),
            },
            is_required=True,
            description=
            "The configuration for interacting with docker in the celery worker.",
        ),
    }

    cfg = merge_dicts(CELERY_CONFIG, additional_config)
    return cfg
Example #14
0
def get_retries_config():
    return Field(
        Selector({
            'enabled': {},
            'disabled': {},
            'deferred': {
                'previous_attempts': Permissive()
            }
        }),
        is_required=False,
        default_value={'enabled': {}},
    )
Example #15
0
def _define_configurations():
    return Field(
        [
            Shape(
                fields={
                    'Classification': Field(
                        String,
                        description='The classification within a configuration.',
                        is_required=False,
                    ),
                    'Configurations': Field(
                        [Permissive()],
                        description='''A list of additional configurations to apply within a
                                configuration object.''',
                        is_required=False,
                    ),
                    'Properties': Field(
                        Permissive(),
                        description='''A set of properties specified within a configuration
                                classification.''',
                        is_required=False,
                    ),
                }
            )
        ],
        description='''For Amazon EMR releases 4.0 and later. The list of configurations supplied
        for the EMR cluster you are creating.

        An optional configuration specification to be used when provisioning cluster instances,
        which can include configurations for applications and software bundled with Amazon EMR. A
        configuration consists of a classification, properties, and optional nested configurations.
        A classification refers to an application-specific configuration file. Properties are the
        settings you want to change in that file. For more information, see the EMR Configuring
        Applications guide.''',
        is_required=False,
    )
def test_deserialize_solid_def_snaps_permissive():
    @solid(config=Field(Permissive({'foo': Field(str)})))
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(solid_def_snap)
    assert isinstance(recevied_config_type, Permissive)
    assert isinstance(recevied_config_type.fields['foo'].config_type, String)
    _map_has_stable_hashes(
        recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key
    )
Example #17
0
 def config_type(cls):
     return {
         "image":
         Field(
             StringSource,
             is_required=False,
             description=
             "The docker image to be used if the repository does not specify one.",
         ),
         "registry":
         Field(
             {
                 "url": Field(StringSource),
                 "username": Field(StringSource),
                 "password": Field(StringSource),
             },
             is_required=False,
             description=
             "Information for using a non local/public docker registry",
         ),
         "env_vars":
         Field(
             [str],
             is_required=False,
             description=
             "The list of environment variables names to forward to the docker container",
         ),
         "network":
         Field(
             StringSource,
             is_required=False,
             description=
             "Name of the network this container to which to connect the launched container at creation time",
         ),
         "container_kwargs":
         Field(
             Permissive(),
             is_required=False,
             description=
             "key-value pairs that can be passed into containers.create. See "
             "https://docker-py.readthedocs.io/en/stable/containers.html for the full list "
             "of available options.",
         ),
     }
Example #18
0
def _define_notebook_task():
    notebook_path = Field(
        String,
        description=
        "The absolute path of the notebook to be run in the Databricks Workspace. "
        "This path must begin with a slash. This field is required.",
        is_required=True,
    )
    base_parameters = Field(
        Permissive(),
        description="Base parameters to be used for each run of this job. "
        "If the notebook takes a parameter that is not specified in the job's base_parameters "
        "or the run-now override parameters, the default value from the notebook will be used. "
        "Retrieve these parameters in a notebook by using dbutils.widgets.get().",
        is_required=False,
    )
    return Field(
        Shape(fields={
            "notebook_path": notebook_path,
            "base_parameters": base_parameters
        }))
Example #19
0
def pg_config():
    return {
        "postgres_url":
        Field(StringSource, is_required=False),
        "postgres_db":
        Field(
            {
                "username": StringSource,
                "password": StringSource,
                "hostname": StringSource,
                "db_name": StringSource,
                "port": Field(IntSource, is_required=False,
                              default_value=5432),
                "params": Field(
                    Permissive(), is_required=False, default_value={}),
            },
            is_required=False,
        ),
        "should_autocreate_tables":
        Field(bool, is_required=False, default_value=True),
    }
def test_deserialize_solid_def_snaps_multi_type_config(snapshot):
    @solid(config=Field(
        Permissive({
            'foo':
            Field(Array(float)),
            'bar':
            Selector({
                'baz': Field(Noneable(int)),
                'qux': {
                    'quux':
                    Field(str),
                    'corge':
                    Field(
                        Enum(
                            'RGB',
                            [
                                EnumValue('red'),
                                EnumValue('green'),
                                EnumValue('blue')
                            ],
                        )),
                },
            }),
        })))
    def fancy_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        fancy_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    snapshot.assert_match(
        serialize_pp(snap_from_config_type(recevied_config_type)))
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Example #21
0
def test_deserialize_solid_def_snaps_multi_type_config(snapshot):
    @solid(config_schema=Field(
        Permissive({
            "foo":
            Field(Array(float)),
            "bar":
            Selector({
                "baz": Field(Noneable(int)),
                "qux": {
                    "quux":
                    Field(str),
                    "corge":
                    Field(
                        Enum(
                            "RGB",
                            [
                                EnumValue("red"),
                                EnumValue("green"),
                                EnumValue("blue")
                            ],
                        )),
                },
            }),
        })))
    def fancy_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        fancy_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap("fancy_solid")
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    snapshot.assert_match(
        serialize_pp(snap_from_config_type(recevied_config_type)))
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Example #22
0
def _dataframe_loader_config():
    read_fields = {
        read_from: Permissive(
            {
                option_name: Field(
                    option_args[0], is_required=option_args[1], description=option_args[2]
                )
                for option_name, option_args in read_opts["options"].items()
            }
        )
        for read_from, read_opts in DataFrameReadTypes.items()
    }

    return Shape(
        {
            "read": Field(
                Selector(read_fields),
            ),
            **{
                util_name: util_spec["options"]
                for util_name, util_spec in DataFrameUtilities.items()
            },
        }
    )
Example #23
0
def _dataframe_materializer_config():
    to_fields = {
        write_to: Permissive(
            {
                option_name: Field(
                    option_args[0], is_required=option_args[1], description=option_args[2]
                )
                for option_name, option_args in to_opts["options"].items()
            }
        )
        for write_to, to_opts in DataFrameToTypes.items()
    }

    return Shape(
        {
            "to": Field(
                Selector(to_fields),
            ),
            **{
                util_name: util_spec["options"]
                for util_name, util_spec in DataFrameUtilities.items()
            },
        }
    )
Example #24
0
 "csv": Permissive(
     {
         "path": Field(
             String,
             is_required=True,
             description="the path in any Hadoop supported file system.",
         ),
         "mode": Field(
             WriteModeOptions,
             is_required=False,
             description="specifies the behavior of the save operation when data already exists.",
         ),
         "compression": Field(
             WriteCompressionTextOptions,
             is_required=False,
             description="compression codec to use when saving to file.",
         ),
         "sep": Field(
             String,
             is_required=False,
             description="sets a single character as a separator for each field and value. If None is set, it uses the default value, ``,``.",
         ),
         "quote": Field(
             String,
             is_required=False,
             description="""sets a single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default value, ``"``. If an empty string is set, it uses ``u0000`` (null character).""",
         ),
         "escape": Field(
             String,
             is_required=False,
             description="sets a single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\\``.",
         ),
         "escapeQuotes": Field(
             Bool,
             is_required=False,
             description="a flag indicating whether values containing quotes should always be enclosed in quotes. If None is set, it uses the default value ``true``, escaping all values containing a quote character.",
         ),
         "quoteAll": Field(
             Bool,
             is_required=False,
             description="a flag indicating whether all values should always be enclosed in quotes. If None is set, it uses the default value ``false``, only escaping values containing a quote character.",
         ),
         "header": Field(
             Bool,
             is_required=False,
             description="writes the names of columns as the first line. If None is set, it uses the default value, ``false``.",
         ),
         "nullValue": Field(
             String,
             is_required=False,
             description="sets the string representation of a null value. If None is set, it uses the default value, empty string.",
         ),
         "dateFormat": Field(
             String,
             is_required=False,
             description="sets the string that indicates a date format. Custom date formats follow the formats at ``java.text.SimpleDateFormat``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``.",
         ),
         "timestampFormat": Field(
             String,
             is_required=False,
             description="sets the string that indicates a timestamp format. Custom date formats follow the formats at ``java.text.SimpleDateFormat``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.",
         ),
         "ignoreLeadingWhiteSpace": Field(
             Bool,
             is_required=False,
             description="a flag indicating whether or not leading whitespaces from values being written should be skipped. If None is set, it uses the default value, ``true``.",
         ),
         "ignoreTrailingWhiteSpace": Field(
             Bool,
             is_required=False,
             description="a flag indicating whether or not trailing whitespaces from values being written should be skipped. If None is set, it uses the default value, ``true``.",
         ),
         "charToEscapeQuoteEscaping": Field(
             String,
             is_required=False,
             description="sets a single character used for escaping the escape for the quote character. If None is set, the default value is escape character when escape and quote characters are different, ``\0`` otherwise..",
         ),
         "encoding": Field(
             String,
             is_required=False,
             description="sets the encoding (charset) of saved csv files. If None is set, the default UTF-8 charset will be used.",
         ),
         "emptyValue": Field(
             String,
             is_required=False,
             description="sets the string representation of an empty value. If None is set, it uses the default value, ``"
             "``.",
         ),
     }
 ),
Example #25
0
                'The URL of the Celery broker. Default: '
                '\'pyamqp://guest@{os.getenv(\'DAGSTER_CELERY_BROKER_HOST\','
                '\'localhost\')}//\'.'
            ),
        ),
        'backend': Field(
            String,
            is_required=False,
            default_value='rpc://',
            description='The URL of the Celery results backend. Default: \'rpc://\'.',
        ),
        'include': Field(
            [str], is_required=False, description='List of modules every worker should import'
        ),
        'config_source': Field(
            Permissive(), is_required=False, description='Settings for the Celery app.'
        ),
    },
)
def celery_executor(init_context):
    '''Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.
Example #26
0
 "options":
 Field(
     Permissive({
         "other":
         Field(
             String,
             is_required=True,
             description="Set index to specified column.",
         ),
         "drop":
         Field(
             Bool,
             is_required=False,
             description="Delete columns to be used as the new index.",
         ),
         "sorted":
         Field(
             Bool,
             is_required=False,
             description=
             "If the index column is already sorted in increasing order.",
         ),
         "divisions":
         Field(
             Any,
             is_required=False,
             description=
             "Known values on which to separate index values of the partitions.",
         ),
     }),
     is_required=False,
     description="Set the DataFrame index using an existing column.",
Example #27
0
    Field(
        StringSource,
        is_required=False,
        description=
        "Name of the network to which to connect the launched container at creation time",
    ),
    "networks":
    Field(
        Array(StringSource),
        is_required=False,
        description=
        "Names of the networks to which to connect the launched container at creation time",
    ),
    "container_kwargs":
    Field(
        Permissive(),
        is_required=False,
        description=
        "key-value pairs that can be passed into containers.create. See "
        "https://docker-py.readthedocs.io/en/stable/containers.html for the full list "
        "of available options.",
    ),
}


def validate_docker_config(network, networks, container_kwargs):
    if network:
        check.invariant(not networks,
                        "cannot set both `network` and `networks`")

    if container_kwargs:
Example #28
0
def test_construct_different_perm_dicts():
    int_perm_dict = Permissive(fields={'an_int': Field(int)})
    string_perm_dict = Permissive(fields={'a_string': Field(str)})

    assert int_perm_dict is not string_perm_dict
    assert int_perm_dict.key != string_perm_dict.key
Example #29
0
def test_construct_permissive_dict_same_same():
    assert Permissive() is Permissive()
Example #30
0
def test_kitchen_sink():
    big_dict_1 = Shape({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Shape({
                'nested_field_one':
                Field(bool),
                'nested_selector':
                Field(
                    Selector({
                        'int_field_in_selector':
                        Field(int),
                        'permissive_dict_in_selector':
                        Field(Permissive()),
                        'permissive_dict_with_fields_in_selector':
                        Field(Permissive({'string_field': Field(str)})),
                    })),
            })),
    })

    big_dict_2 = Shape({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Shape(
                fields={
                    'nested_field_one':
                    Field(bool),
                    'nested_selector':
                    Field(
                        Selector(
                            fields={
                                'permissive_dict_in_selector':
                                Field(Permissive()),
                                'int_field_in_selector':
                                Field(int),
                                'permissive_dict_with_fields_in_selector':
                                Field(
                                    Permissive(
                                        fields={'string_field': Field(str)})),
                            })),
                })),
    })

    assert big_dict_1 is big_dict_2
    assert big_dict_1.key == big_dict_2.key

    # differs way down in tree
    big_dict_3 = Shape({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Shape(
                fields={
                    'nested_field_one':
                    Field(bool),
                    'nested_selector':
                    Field(
                        Selector(
                            fields={
                                'permissive_dict_in_selector':
                                Field(Permissive()),
                                'int_field_in_selector':
                                Field(int),
                                'permissive_dict_with_fields_in_selector':
                                Field(
                                    Permissive(
                                        fields={'int_field': Field(int)})),
                            })),
                })),
    })

    assert big_dict_1 is not big_dict_3
    assert big_dict_1.key != big_dict_3.key