Exemple #1
0
def test_nullable_dict():
    dict_with_int = Shape({'int_field': Int})

    assert not eval_config_value_from_dagster_type(dict_with_int, None).success
    assert not eval_config_value_from_dagster_type(dict_with_int, {}).success
    assert not eval_config_value_from_dagster_type(dict_with_int, {'int_field': None}).success
    assert eval_config_value_from_dagster_type(dict_with_int, {'int_field': 1}).success

    nullable_dict_with_int = Noneable(Shape({'int_field': Int}))

    assert eval_config_value_from_dagster_type(nullable_dict_with_int, None).success
    assert not eval_config_value_from_dagster_type(nullable_dict_with_int, {}).success
    assert not eval_config_value_from_dagster_type(
        nullable_dict_with_int, {'int_field': None}
    ).success
    assert eval_config_value_from_dagster_type(nullable_dict_with_int, {'int_field': 1}).success

    dict_with_nullable_int = Shape({'int_field': Field(Noneable(int))})

    assert not eval_config_value_from_dagster_type(dict_with_nullable_int, None).success
    assert not eval_config_value_from_dagster_type(dict_with_nullable_int, {}).success
    assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': None}).success
    assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': 1}).success

    nullable_dict_with_nullable_int = Noneable(Shape({'int_field': Field(Noneable(int))}))

    assert eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, None).success
    assert not eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, {}).success
    assert eval_config_value_from_dagster_type(
        nullable_dict_with_nullable_int, {'int_field': None}
    ).success
    assert eval_config_value_from_dagster_type(
        nullable_dict_with_nullable_int, {'int_field': 1}
    ).success
Exemple #2
0
def test_nullable_dict():
    dict_with_int = Shape({'int_field': int})

    assert not validate_config(dict_with_int, None).success
    assert not validate_config(dict_with_int, {}).success
    assert not validate_config(dict_with_int, {'int_field': None}).success
    assert validate_config(dict_with_int, {'int_field': 1}).success

    nullable_dict_with_int = Noneable(Shape({'int_field': int}))

    assert validate_config(nullable_dict_with_int, None).success
    assert not validate_config(nullable_dict_with_int, {}).success
    assert not validate_config(nullable_dict_with_int, {
        'int_field': None
    }).success
    assert validate_config(nullable_dict_with_int, {'int_field': 1}).success

    dict_with_nullable_int = Shape({'int_field': Field(Noneable(int))})

    assert not validate_config(dict_with_nullable_int, None).success
    assert not validate_config(dict_with_nullable_int, {}).success
    assert validate_config(dict_with_nullable_int, {'int_field': None}).success
    assert validate_config(dict_with_nullable_int, {'int_field': 1}).success

    nullable_dict_with_nullable_int = Noneable(
        Shape({'int_field': Field(Noneable(int))}))

    assert validate_config(nullable_dict_with_nullable_int, None).success
    assert not validate_config(nullable_dict_with_nullable_int, {}).success
    assert validate_config(nullable_dict_with_nullable_int, {
        'int_field': None
    }).success
    assert validate_config(nullable_dict_with_nullable_int, {
        'int_field': 1
    }).success
Exemple #3
0
def _define_bootstrap_actions():
    name = Field(String, description='The name of the bootstrap action.', is_required=True)

    path = Field(
        String,
        description='''Location of the script to run during a bootstrap action. Can be either a
        location in Amazon S3 or on a local file system.''',
        is_required=True,
    )

    args = Field(
        [String],
        description='A list of command line arguments to pass to the bootstrap action script.',
        is_required=False,
    )

    bootstrap_action = Shape(
        fields={
            'Name': name,
            'ScriptBootstrapAction': Field(
                Shape(fields={'Path': path, 'Args': args}),
                description='The script run by the bootstrap action.',
                is_required=True,
            ),
        }
    )

    return Field(
        [bootstrap_action],
        description='''A list of bootstrap actions to run before Hadoop starts on the cluster
        nodes.''',
        is_required=False,
    )
Exemple #4
0
def test_construct_same_fields_different_aliases():
    int_dict_1 = Shape(fields={"an_int": Field(int)},
                       field_aliases={"an_int": "foo"})
    int_dict_2 = Shape(fields={"an_int": Field(int)},
                       field_aliases={"an_int": "bar"})

    assert int_dict_1 is not int_dict_2
    assert not int_dict_1.key == int_dict_2.key
Exemple #5
0
def test_construct_same_dicts():
    int_dict_1 = Shape(fields={'an_int': Field(int)})
    int_dict_2 = Shape(fields={'an_int': Field(int)})

    # assert identical object
    assert int_dict_1 is int_dict_2
    # assert equivalent key
    assert int_dict_1.key == int_dict_2.key
Exemple #6
0
def test_field_order_irrelevant():
    int_dict_1 = Shape(fields={'an_int': Field(int), 'another_int': Field(int)})

    int_dict_2 = Shape(fields={'another_int': Field(int), 'an_int': Field(int)})

    # assert identical object
    assert int_dict_1 is int_dict_2
    # assert equivalent key
    assert int_dict_1.key == int_dict_2.key
Exemple #7
0
def _define_steps():
    name = Field(String, description='The name of the step.', is_required=True)

    actionOnFailure = Field(
        EmrActionOnFailure,
        description='''The action to take when the cluster step fails. Possible values are
        TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE. TERMINATE_JOB_FLOW is provided for
        backward compatibility. We recommend using TERMINATE_CLUSTER instead.''',
        is_required=False,
    )

    hadoopJarStep = Field(
        Shape(
            fields={
                'Properties': Field(
                    [Shape(fields={'Key': Field(String), 'Value': Field(String)})],
                    description='''A list of Java properties that are set when the step runs. You
                    can use these properties to pass key value pairs to your main function.''',
                    is_required=False,
                ),
                'Jar': Field(
                    String,
                    description='A path to a JAR file run during the step.',
                    is_required=True,
                ),
                'MainClass': Field(
                    String,
                    description='''The name of the main class in the specified Java file. If not
                    specified, the JAR file should specify a Main-Class in its manifest file.''',
                    is_required=False,
                ),
                'Args': Field(
                    [String],
                    description='''A list of command line arguments passed to the JAR file's main
                    function when executed.''',
                    is_required=False,
                ),
            }
        ),
        description='The JAR file used for the step.',
    )

    return Field(
        [
            Shape(
                fields={
                    'Name': name,
                    'ActionOnFailure': actionOnFailure,
                    'HadoopJarStep': hadoopJarStep,
                }
            )
        ],
        description='A list of steps to run.',
    )
def _construct_shape_from_snap(config_type_snap, config_snap_map):
    check.list_param(config_type_snap.fields, "config_field_snap", ConfigFieldSnap)

    return Shape(
        fields=_construct_fields(config_type_snap, config_snap_map),
        description=config_type_snap.description,
    )
Exemple #9
0
def _define_node_types():
    node_type_id = Field(
        String,
        description=
        "This field encodes, through a single value, the resources available to each "
        "of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned "
        "and optimized for memory or compute intensive workloads. "
        "A list of available node types can be retrieved by using the List node types API "
        "call. This field is required.",
        is_required=True,
    )

    driver_node_type_id = Field(
        String,
        description="The node type of the Spark driver. "
        "This field is optional; if unset, the driver node type is set as the "
        "same value as node_type_id defined above.",
        is_required=False,
    )

    return Field(
        Shape(
            fields={
                "node_type_id": node_type_id,
                "driver_node_type_id": driver_node_type_id
            }))
Exemple #10
0
def _dataframe_loader_config():
    read_fields = {
        read_from: Permissive({
            option_name: Field(option_args[0],
                               is_required=option_args[1],
                               description=option_args[2])
            for option_name, option_args in read_opts["options"].items()
        })
        for read_from, read_opts in DataFrameReadTypes.items()
    }

    return Shape({
        "read": Field(
            Selector(read_fields),
            is_required=False,
        ),
        **{
            util_name: util_spec["options"]
            for util_name, util_spec in DataFrameUtilities.items()
        },
        # https://github.com/dagster-io/dagster/issues/2872
        **{
            field_name: Field(
                field_config,
                is_required=False,
            )
            for field_name, field_config in read_fields.items()
        },
    })
Exemple #11
0
def _define_custom_tags():
    key = Field(
        String,
        description=
        "The key of the tag. The key length must be between 1 and 127 UTF-8 "
        "characters, inclusive. For a list of all restrictions, see AWS Tag Restrictions: "
        "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions",
        is_required=True,
    )
    value = Field(
        String,
        description=
        "The value of the tag. The value length must be less than or equal to "
        "255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: "
        "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions",
        is_required=True,
    )
    return Field(
        [Shape(fields={
            "key": key,
            "value": value
        })],
        description=
        "Additional tags for cluster resources. Databricks tags all cluster resources "
        "(e.g., AWS instances and EBS volumes) with these tags in addition to default_tags. Note: "
        "- Tags are not supported on legacy node types such as compute-optimized and "
        "memory-optimized "
        "- Databricks allows at most 45 custom tags"
        "More restrictions may apply if using Azure Databricks; refer to the official docs "
        "for further details.",
        is_required=False,
    )
Exemple #12
0
def _dataframe_materializer_config():
    to_fields = {
        write_to: Permissive({
            option_name: Field(option_args[0],
                               is_required=option_args[1],
                               description=option_args[2])
            for option_name, option_args in to_opts["options"].items()
        })
        for write_to, to_opts in DataFrameToTypes.items()
    }

    return Shape({
        "to": Field(
            Selector(to_fields),
            is_required=False,
        ),
        **{
            util_name: util_spec["options"]
            for util_name, util_spec in DataFrameUtilities.items()
        },
        # https://github.com/dagster-io/dagster/issues/2872
        **{
            field_name: Field(
                field_config,
                is_required=False,
            )
            for field_name, field_config in to_fields.items()
        },
    })
Exemple #13
0
def define_databricks_secrets_config():
    name = Field(
        String,
        description="The environment variable name, e.g. `DATABRICKS_TOKEN`.",
        is_required=True,
    )
    key = Field(String,
                description="The key of the Databricks secret.",
                is_required=True)
    scope = Field(String,
                  description="The scope of the Databricks secret.",
                  is_required=True)
    return Field(
        [Shape(fields={
            "name": name,
            "key": key,
            "scope": scope
        })],
        description=
        "Databricks secrets to be exported as environment variables. Since runs "
        "will execute in the Databricks runtime environment, environment variables (such as those "
        "required for a `StringSource` config variable) will not be accessible to Dagster. These "
        "variables must be stored as Databricks secrets and specified here, which will ensure "
        "they are re-exported as environment variables accessible to Dagster upon execution.",
        is_required=False,
    )
Exemple #14
0
def _define_maven_library():
    coordinates = Field(
        String,
        description=
        "Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2. "
        "This field is required.",
        is_required=True,
    )
    repo = Field(
        String,
        description="Maven repo to install the Maven package from. "
        "If omitted, both Maven Central Repository and Spark Packages are searched.",
        is_required=False,
    )
    exclusions = Field(
        [String],
        description="List of dependences to exclude. For example: "
        '["slf4j:slf4j", "*:hadoop-client"]. '
        "Maven dependency exclusions: "
        "https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.",
        is_required=False,
    )
    return Field(
        Shape(fields={
            "coordinates": coordinates,
            "repo": repo,
            "exclusions": exclusions
        }),
        description="Specification of a Maven library to be installed.",
    )
Exemple #15
0
def test_selector_within_dict_no_subfields():
    result = validate_config(Shape({'selector': Field(ExampleSelector)}),
                             {'selector': {}})
    assert not result.success
    assert len(result.errors) == 1
    assert result.errors[0].message == (
        "Must specify a field at path root:selector if more than one field "
        "is defined. Defined fields: ['option_one', 'option_two']")
Exemple #16
0
def test_selector_within_dict_no_subfields():
    result = eval_config_value_from_dagster_type(
        Shape({"selector": Field(ExampleSelector)}), {"selector": {}})
    assert not result.success
    assert len(result.errors) == 1
    assert result.errors[0].message == (
        "Must specify a field at path root:selector if more than one field "
        "is defined. Defined fields: ['option_one', 'option_two']")
Exemple #17
0
def test_list_of_dict():
    inner_dict_dagster_type = Shape({"foo": Field(str)})
    list_of_dict_snap = snap_from_dagster_type([inner_dict_dagster_type])

    assert list_of_dict_snap.key.startswith("Array")
    child_type_keys = list_of_dict_snap.get_child_type_keys()
    assert child_type_keys
    assert len(child_type_keys) == 1
    assert child_type_keys[0].startswith("Shape")
def test_list_of_dict():
    inner_dict_dagster_type = Shape({'foo': Field(str)})
    list_of_dict_meta = meta_from_dagster_type([inner_dict_dagster_type])

    assert list_of_dict_meta.key.startswith('Array')
    child_type_keys = list_of_dict_meta.get_child_type_keys()
    assert child_type_keys
    assert len(child_type_keys) == 1
    assert child_type_keys[0].startswith('Shape')
Exemple #19
0
def test_post_process_error():
    error_result = eval_config_value_from_dagster_type(
        Shape({"foo": StringSource}), {"foo": {"env": "THIS_ENV_VAR_DOES_NOT_EXIST"}}
    )
    assert not error_result.success
    assert len(error_result.errors) == 1
    error = error_result.errors[0]
    assert error.reason == DagsterEvaluationErrorReason.FAILED_POST_PROCESSING
    assert len(error.stack.entries) == 1
def test_map_of_dict():
    inner_dict_dagster_type = Shape({"foo": Field(str)})
    map_of_dict_snap = snap_from_dagster_type({str: inner_dict_dagster_type})

    assert map_of_dict_snap.key.startswith("Map")
    child_type_keys = map_of_dict_snap.get_child_type_keys()
    assert child_type_keys
    assert len(child_type_keys) == 2
    assert child_type_keys[0] == "String"
    assert child_type_keys[1].startswith("Shape")
Exemple #21
0
def _define_instance_groups():
    return Field(
        [
            Shape(
                fields={
                    'Name': Field(
                        String,
                        description='Friendly name given to the instance group.',
                        is_required=False,
                    ),
                    'Market': Field(
                        EmrMarket,
                        description='''Market type of the EC2 instances used to create a cluster
                            node.''',
                        is_required=False,
                    ),
                    'InstanceRole': Field(
                        EmrInstanceRole,
                        description='The role of the instance group in the cluster.',
                        is_required=True,
                    ),
                    'BidPrice': Field(
                        String,
                        description='''The maximum Spot price your are willing to pay for EC2
                            instances.

                            An optional, nullable field that applies if the MarketType for the
                            instance group is specified as SPOT . Specify the maximum spot price in
                            USD. If the value is NULL and SPOT is specified, the maximum Spot price
                            is set equal to the On-Demand price.''',
                        is_required=False,
                    ),
                    'InstanceType': Field(
                        String,
                        description='''The EC2 instance type for all instances in the instance
                            group.''',
                        is_required=True,
                    ),
                    'InstanceCount': Field(
                        Int,
                        description='Target number of instances for the instance group.',
                        is_required=True,
                    ),
                    'Configurations': _define_configurations(),
                    'EbsConfiguration': _define_ebs_configuration(),
                    'AutoScalingPolicy': _define_auto_scaling_policy(),
                }
            )
        ],
        description='Configuration for the instance groups in a cluster.',
        is_required=False,
    )
Exemple #22
0
def test_list_of_scalar_or_dict():
    int_or_dict_list = resolve_to_config_type(
        [ScalarUnion(scalar_type=int, non_scalar_schema=Shape({"a_string": str}))]
    )

    assert validate_config(int_or_dict_list, []).success
    assert validate_config(int_or_dict_list, [2]).success
    assert validate_config(int_or_dict_list, [{"a_string": "kjdfd"}]).success
    assert validate_config(int_or_dict_list, [2, {"a_string": "kjdfd"}]).success

    assert not validate_config(int_or_dict_list, [2, {"wrong_key": "kjdfd"}]).success
    assert not validate_config(int_or_dict_list, [2, {"a_string": 2343}]).success
    assert not validate_config(int_or_dict_list, ["kjdfkd", {"a_string": "kjdfd"}]).success
Exemple #23
0
def test_post_process_error():
    from dagster.core.instance.source_types import StringSource

    error_result = eval_config_value_from_dagster_type(
        Shape({'foo': StringSource}),
        {'foo': {
            'env': 'THIS_ENV_VAR_DOES_NOT_EXIST'
        }})
    assert not error_result.success
    assert len(error_result.errors) == 1
    error = error_result.errors[0]
    assert error.reason == DagsterEvaluationErrorReason.FAILED_POST_PROCESSING
    assert len(error.stack.entries) == 1
Exemple #24
0
def test_scalar_or_dict():

    int_or_dict = ScalarUnion(scalar_type=int, non_scalar_schema=Shape({"a_string": str}))

    assert validate_config(int_or_dict, 2).success
    assert not validate_config(int_or_dict, "2").success
    assert not validate_config(int_or_dict, False).success

    assert validate_config(int_or_dict, {"a_string": "kjdfk"}).success
    assert not validate_config(int_or_dict, {}).success
    assert not validate_config(int_or_dict, {"wrong_key": "kjdfd"}).success
    assert not validate_config(int_or_dict, {"a_string": 2}).success
    assert not validate_config(int_or_dict, {"a_string": "kjdfk", "extra_field": "kd"}).success
Exemple #25
0
def test_scalar_or_dict():

    int_or_dict = ScalarUnion(
        scalar_type=resolve_to_config_type(int), non_scalar_type=Shape({'a_string': str})
    )

    assert validate_config(int_or_dict, 2).success
    assert not validate_config(int_or_dict, '2').success
    assert not validate_config(int_or_dict, False).success

    assert validate_config(int_or_dict, {'a_string': 'kjdfk'}).success
    assert not validate_config(int_or_dict, {}).success
    assert not validate_config(int_or_dict, {'wrong_key': 'kjdfd'}).success
    assert not validate_config(int_or_dict, {'a_string': 2}).success
    assert not validate_config(int_or_dict, {'a_string': 'kjdfk', 'extra_field': 'kd'}).success
Exemple #26
0
def test_field_alias_order_irrelevant():
    int_dict_1 = Shape(
        fields={
            "an_int": Field(int),
            "another_int": Field(int)
        },
        field_aliases={
            "an_int": "foo",
            "another_int": "bar"
        },
    )
    int_dict_2 = Shape(
        fields={
            "an_int": Field(int),
            "another_int": Field(int)
        },
        field_aliases={
            "another_int": "bar",
            "an_int": "foo"
        },
    )

    assert int_dict_1 is int_dict_2
    assert int_dict_1.key == int_dict_2.key
Exemple #27
0
def test_list_of_dict():
    inner_dict_dagster_type = Shape({'foo': Field(str)})
    list_of_dict_meta = meta_from_dagster_type([inner_dict_dagster_type])

    assert list_of_dict_meta.key.startswith('Array')
    assert list_of_dict_meta.inner_type_refs
    assert len(list_of_dict_meta.inner_type_refs) == 1
    # Both Shape[...] and str are NonGenericTypeRefMetas in this schema
    dict_ref = list_of_dict_meta.type_param_refs[0]
    assert isinstance(dict_ref, NonGenericTypeRefMeta)
    assert dict_ref.key.startswith('Shape')

    assert (len(list_of_dict_meta.type_param_refs) == 1
            and list_of_dict_meta.type_param_refs[0].key
            == resolve_to_config_type(inner_dict_dagster_type).key)
Exemple #28
0
def test_list_of_scalar_or_dict():
    int_or_dict_list = resolve_to_config_type(
        [
            ScalarUnion(
                scalar_type=resolve_to_config_type(int), non_scalar_type=Shape({'a_string': str})
            )
        ]
    )

    assert validate_config(int_or_dict_list, []).success
    assert validate_config(int_or_dict_list, [2]).success
    assert validate_config(int_or_dict_list, [{'a_string': 'kjdfd'}]).success
    assert validate_config(int_or_dict_list, [2, {'a_string': 'kjdfd'}]).success

    assert not validate_config(int_or_dict_list, [2, {'wrong_key': 'kjdfd'}]).success
    assert not validate_config(int_or_dict_list, [2, {'a_string': 2343}]).success
    assert not validate_config(int_or_dict_list, ['kjdfkd', {'a_string': 'kjdfd'}]).success
Exemple #29
0
def _define_spark_submit_task():
    parameters = Field(
        [String],
        description="Command-line parameters passed to spark submit.",
        is_required=True,
    )
    return Field(
        Shape(fields={"parameters": parameters}),
        description="Important!"
        "You can Spark submit tasks only on new clusters. "
        "In the new_cluster specification, libraries and spark_conf are not supported. Instead, "
        "use --jars and --py-files to add Java and Python libraries and use --conf to set the "
        "Spark configuration. "
        "master, deploy-mode, and executor-cores are automatically configured by Databricks; "
        "you cannot specify them in parameters. "
        "By default, the Spark submit job uses all available memory (excluding reserved memory "
        "for Databricks services). You can set --driver-memory, and --executor-memory to a "
        "smaller value to leave some room for off-heap usage. "
        "The --jars, --py-files, --files arguments support DBFS and S3 paths.",
    )
Exemple #30
0
def _define_spark_python_task():
    python_file = Field(
        String,
        description=
        "The URI of the Python file to be executed. DBFS and S3 paths are supported."
        "This field is required.",
        is_required=True,
    )
    parameters = Field(
        [String],
        description=
        "Command line parameters that will be passed to the Python file.",
        is_required=False,
        default_value=[],
    )
    return Field(
        Shape(fields={
            "python_file": python_file,
            "parameters": parameters
        }))