def test_nullable_dict(): dict_with_int = Shape({'int_field': Int}) assert not eval_config_value_from_dagster_type(dict_with_int, None).success assert not eval_config_value_from_dagster_type(dict_with_int, {}).success assert not eval_config_value_from_dagster_type(dict_with_int, {'int_field': None}).success assert eval_config_value_from_dagster_type(dict_with_int, {'int_field': 1}).success nullable_dict_with_int = Noneable(Shape({'int_field': Int})) assert eval_config_value_from_dagster_type(nullable_dict_with_int, None).success assert not eval_config_value_from_dagster_type(nullable_dict_with_int, {}).success assert not eval_config_value_from_dagster_type( nullable_dict_with_int, {'int_field': None} ).success assert eval_config_value_from_dagster_type(nullable_dict_with_int, {'int_field': 1}).success dict_with_nullable_int = Shape({'int_field': Field(Noneable(int))}) assert not eval_config_value_from_dagster_type(dict_with_nullable_int, None).success assert not eval_config_value_from_dagster_type(dict_with_nullable_int, {}).success assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': None}).success assert eval_config_value_from_dagster_type(dict_with_nullable_int, {'int_field': 1}).success nullable_dict_with_nullable_int = Noneable(Shape({'int_field': Field(Noneable(int))})) assert eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, None).success assert not eval_config_value_from_dagster_type(nullable_dict_with_nullable_int, {}).success assert eval_config_value_from_dagster_type( nullable_dict_with_nullable_int, {'int_field': None} ).success assert eval_config_value_from_dagster_type( nullable_dict_with_nullable_int, {'int_field': 1} ).success
def test_nullable_dict(): dict_with_int = Shape({'int_field': int}) assert not validate_config(dict_with_int, None).success assert not validate_config(dict_with_int, {}).success assert not validate_config(dict_with_int, {'int_field': None}).success assert validate_config(dict_with_int, {'int_field': 1}).success nullable_dict_with_int = Noneable(Shape({'int_field': int})) assert validate_config(nullable_dict_with_int, None).success assert not validate_config(nullable_dict_with_int, {}).success assert not validate_config(nullable_dict_with_int, { 'int_field': None }).success assert validate_config(nullable_dict_with_int, {'int_field': 1}).success dict_with_nullable_int = Shape({'int_field': Field(Noneable(int))}) assert not validate_config(dict_with_nullable_int, None).success assert not validate_config(dict_with_nullable_int, {}).success assert validate_config(dict_with_nullable_int, {'int_field': None}).success assert validate_config(dict_with_nullable_int, {'int_field': 1}).success nullable_dict_with_nullable_int = Noneable( Shape({'int_field': Field(Noneable(int))})) assert validate_config(nullable_dict_with_nullable_int, None).success assert not validate_config(nullable_dict_with_nullable_int, {}).success assert validate_config(nullable_dict_with_nullable_int, { 'int_field': None }).success assert validate_config(nullable_dict_with_nullable_int, { 'int_field': 1 }).success
def _define_bootstrap_actions(): name = Field(String, description='The name of the bootstrap action.', is_required=True) path = Field( String, description='''Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system.''', is_required=True, ) args = Field( [String], description='A list of command line arguments to pass to the bootstrap action script.', is_required=False, ) bootstrap_action = Shape( fields={ 'Name': name, 'ScriptBootstrapAction': Field( Shape(fields={'Path': path, 'Args': args}), description='The script run by the bootstrap action.', is_required=True, ), } ) return Field( [bootstrap_action], description='''A list of bootstrap actions to run before Hadoop starts on the cluster nodes.''', is_required=False, )
def test_construct_same_fields_different_aliases(): int_dict_1 = Shape(fields={"an_int": Field(int)}, field_aliases={"an_int": "foo"}) int_dict_2 = Shape(fields={"an_int": Field(int)}, field_aliases={"an_int": "bar"}) assert int_dict_1 is not int_dict_2 assert not int_dict_1.key == int_dict_2.key
def test_construct_same_dicts(): int_dict_1 = Shape(fields={'an_int': Field(int)}) int_dict_2 = Shape(fields={'an_int': Field(int)}) # assert identical object assert int_dict_1 is int_dict_2 # assert equivalent key assert int_dict_1.key == int_dict_2.key
def test_field_order_irrelevant(): int_dict_1 = Shape(fields={'an_int': Field(int), 'another_int': Field(int)}) int_dict_2 = Shape(fields={'another_int': Field(int), 'an_int': Field(int)}) # assert identical object assert int_dict_1 is int_dict_2 # assert equivalent key assert int_dict_1.key == int_dict_2.key
def _define_steps(): name = Field(String, description='The name of the step.', is_required=True) actionOnFailure = Field( EmrActionOnFailure, description='''The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, and CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using TERMINATE_CLUSTER instead.''', is_required=False, ) hadoopJarStep = Field( Shape( fields={ 'Properties': Field( [Shape(fields={'Key': Field(String), 'Value': Field(String)})], description='''A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function.''', is_required=False, ), 'Jar': Field( String, description='A path to a JAR file run during the step.', is_required=True, ), 'MainClass': Field( String, description='''The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.''', is_required=False, ), 'Args': Field( [String], description='''A list of command line arguments passed to the JAR file's main function when executed.''', is_required=False, ), } ), description='The JAR file used for the step.', ) return Field( [ Shape( fields={ 'Name': name, 'ActionOnFailure': actionOnFailure, 'HadoopJarStep': hadoopJarStep, } ) ], description='A list of steps to run.', )
def _construct_shape_from_snap(config_type_snap, config_snap_map): check.list_param(config_type_snap.fields, "config_field_snap", ConfigFieldSnap) return Shape( fields=_construct_fields(config_type_snap, config_snap_map), description=config_type_snap.description, )
def _define_node_types(): node_type_id = Field( String, description= "This field encodes, through a single value, the resources available to each " "of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned " "and optimized for memory or compute intensive workloads. " "A list of available node types can be retrieved by using the List node types API " "call. This field is required.", is_required=True, ) driver_node_type_id = Field( String, description="The node type of the Spark driver. " "This field is optional; if unset, the driver node type is set as the " "same value as node_type_id defined above.", is_required=False, ) return Field( Shape( fields={ "node_type_id": node_type_id, "driver_node_type_id": driver_node_type_id }))
def _dataframe_loader_config(): read_fields = { read_from: Permissive({ option_name: Field(option_args[0], is_required=option_args[1], description=option_args[2]) for option_name, option_args in read_opts["options"].items() }) for read_from, read_opts in DataFrameReadTypes.items() } return Shape({ "read": Field( Selector(read_fields), is_required=False, ), **{ util_name: util_spec["options"] for util_name, util_spec in DataFrameUtilities.items() }, # https://github.com/dagster-io/dagster/issues/2872 **{ field_name: Field( field_config, is_required=False, ) for field_name, field_config in read_fields.items() }, })
def _define_custom_tags(): key = Field( String, description= "The key of the tag. The key length must be between 1 and 127 UTF-8 " "characters, inclusive. For a list of all restrictions, see AWS Tag Restrictions: " "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions", is_required=True, ) value = Field( String, description= "The value of the tag. The value length must be less than or equal to " "255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: " "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions", is_required=True, ) return Field( [Shape(fields={ "key": key, "value": value })], description= "Additional tags for cluster resources. Databricks tags all cluster resources " "(e.g., AWS instances and EBS volumes) with these tags in addition to default_tags. Note: " "- Tags are not supported on legacy node types such as compute-optimized and " "memory-optimized " "- Databricks allows at most 45 custom tags" "More restrictions may apply if using Azure Databricks; refer to the official docs " "for further details.", is_required=False, )
def _dataframe_materializer_config(): to_fields = { write_to: Permissive({ option_name: Field(option_args[0], is_required=option_args[1], description=option_args[2]) for option_name, option_args in to_opts["options"].items() }) for write_to, to_opts in DataFrameToTypes.items() } return Shape({ "to": Field( Selector(to_fields), is_required=False, ), **{ util_name: util_spec["options"] for util_name, util_spec in DataFrameUtilities.items() }, # https://github.com/dagster-io/dagster/issues/2872 **{ field_name: Field( field_config, is_required=False, ) for field_name, field_config in to_fields.items() }, })
def define_databricks_secrets_config(): name = Field( String, description="The environment variable name, e.g. `DATABRICKS_TOKEN`.", is_required=True, ) key = Field(String, description="The key of the Databricks secret.", is_required=True) scope = Field(String, description="The scope of the Databricks secret.", is_required=True) return Field( [Shape(fields={ "name": name, "key": key, "scope": scope })], description= "Databricks secrets to be exported as environment variables. Since runs " "will execute in the Databricks runtime environment, environment variables (such as those " "required for a `StringSource` config variable) will not be accessible to Dagster. These " "variables must be stored as Databricks secrets and specified here, which will ensure " "they are re-exported as environment variables accessible to Dagster upon execution.", is_required=False, )
def _define_maven_library(): coordinates = Field( String, description= "Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2. " "This field is required.", is_required=True, ) repo = Field( String, description="Maven repo to install the Maven package from. " "If omitted, both Maven Central Repository and Spark Packages are searched.", is_required=False, ) exclusions = Field( [String], description="List of dependences to exclude. For example: " '["slf4j:slf4j", "*:hadoop-client"]. ' "Maven dependency exclusions: " "https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", is_required=False, ) return Field( Shape(fields={ "coordinates": coordinates, "repo": repo, "exclusions": exclusions }), description="Specification of a Maven library to be installed.", )
def test_selector_within_dict_no_subfields(): result = validate_config(Shape({'selector': Field(ExampleSelector)}), {'selector': {}}) assert not result.success assert len(result.errors) == 1 assert result.errors[0].message == ( "Must specify a field at path root:selector if more than one field " "is defined. Defined fields: ['option_one', 'option_two']")
def test_selector_within_dict_no_subfields(): result = eval_config_value_from_dagster_type( Shape({"selector": Field(ExampleSelector)}), {"selector": {}}) assert not result.success assert len(result.errors) == 1 assert result.errors[0].message == ( "Must specify a field at path root:selector if more than one field " "is defined. Defined fields: ['option_one', 'option_two']")
def test_list_of_dict(): inner_dict_dagster_type = Shape({"foo": Field(str)}) list_of_dict_snap = snap_from_dagster_type([inner_dict_dagster_type]) assert list_of_dict_snap.key.startswith("Array") child_type_keys = list_of_dict_snap.get_child_type_keys() assert child_type_keys assert len(child_type_keys) == 1 assert child_type_keys[0].startswith("Shape")
def test_list_of_dict(): inner_dict_dagster_type = Shape({'foo': Field(str)}) list_of_dict_meta = meta_from_dagster_type([inner_dict_dagster_type]) assert list_of_dict_meta.key.startswith('Array') child_type_keys = list_of_dict_meta.get_child_type_keys() assert child_type_keys assert len(child_type_keys) == 1 assert child_type_keys[0].startswith('Shape')
def test_post_process_error(): error_result = eval_config_value_from_dagster_type( Shape({"foo": StringSource}), {"foo": {"env": "THIS_ENV_VAR_DOES_NOT_EXIST"}} ) assert not error_result.success assert len(error_result.errors) == 1 error = error_result.errors[0] assert error.reason == DagsterEvaluationErrorReason.FAILED_POST_PROCESSING assert len(error.stack.entries) == 1
def test_map_of_dict(): inner_dict_dagster_type = Shape({"foo": Field(str)}) map_of_dict_snap = snap_from_dagster_type({str: inner_dict_dagster_type}) assert map_of_dict_snap.key.startswith("Map") child_type_keys = map_of_dict_snap.get_child_type_keys() assert child_type_keys assert len(child_type_keys) == 2 assert child_type_keys[0] == "String" assert child_type_keys[1].startswith("Shape")
def _define_instance_groups(): return Field( [ Shape( fields={ 'Name': Field( String, description='Friendly name given to the instance group.', is_required=False, ), 'Market': Field( EmrMarket, description='''Market type of the EC2 instances used to create a cluster node.''', is_required=False, ), 'InstanceRole': Field( EmrInstanceRole, description='The role of the instance group in the cluster.', is_required=True, ), 'BidPrice': Field( String, description='''The maximum Spot price your are willing to pay for EC2 instances. An optional, nullable field that applies if the MarketType for the instance group is specified as SPOT . Specify the maximum spot price in USD. If the value is NULL and SPOT is specified, the maximum Spot price is set equal to the On-Demand price.''', is_required=False, ), 'InstanceType': Field( String, description='''The EC2 instance type for all instances in the instance group.''', is_required=True, ), 'InstanceCount': Field( Int, description='Target number of instances for the instance group.', is_required=True, ), 'Configurations': _define_configurations(), 'EbsConfiguration': _define_ebs_configuration(), 'AutoScalingPolicy': _define_auto_scaling_policy(), } ) ], description='Configuration for the instance groups in a cluster.', is_required=False, )
def test_list_of_scalar_or_dict(): int_or_dict_list = resolve_to_config_type( [ScalarUnion(scalar_type=int, non_scalar_schema=Shape({"a_string": str}))] ) assert validate_config(int_or_dict_list, []).success assert validate_config(int_or_dict_list, [2]).success assert validate_config(int_or_dict_list, [{"a_string": "kjdfd"}]).success assert validate_config(int_or_dict_list, [2, {"a_string": "kjdfd"}]).success assert not validate_config(int_or_dict_list, [2, {"wrong_key": "kjdfd"}]).success assert not validate_config(int_or_dict_list, [2, {"a_string": 2343}]).success assert not validate_config(int_or_dict_list, ["kjdfkd", {"a_string": "kjdfd"}]).success
def test_post_process_error(): from dagster.core.instance.source_types import StringSource error_result = eval_config_value_from_dagster_type( Shape({'foo': StringSource}), {'foo': { 'env': 'THIS_ENV_VAR_DOES_NOT_EXIST' }}) assert not error_result.success assert len(error_result.errors) == 1 error = error_result.errors[0] assert error.reason == DagsterEvaluationErrorReason.FAILED_POST_PROCESSING assert len(error.stack.entries) == 1
def test_scalar_or_dict(): int_or_dict = ScalarUnion(scalar_type=int, non_scalar_schema=Shape({"a_string": str})) assert validate_config(int_or_dict, 2).success assert not validate_config(int_or_dict, "2").success assert not validate_config(int_or_dict, False).success assert validate_config(int_or_dict, {"a_string": "kjdfk"}).success assert not validate_config(int_or_dict, {}).success assert not validate_config(int_or_dict, {"wrong_key": "kjdfd"}).success assert not validate_config(int_or_dict, {"a_string": 2}).success assert not validate_config(int_or_dict, {"a_string": "kjdfk", "extra_field": "kd"}).success
def test_scalar_or_dict(): int_or_dict = ScalarUnion( scalar_type=resolve_to_config_type(int), non_scalar_type=Shape({'a_string': str}) ) assert validate_config(int_or_dict, 2).success assert not validate_config(int_or_dict, '2').success assert not validate_config(int_or_dict, False).success assert validate_config(int_or_dict, {'a_string': 'kjdfk'}).success assert not validate_config(int_or_dict, {}).success assert not validate_config(int_or_dict, {'wrong_key': 'kjdfd'}).success assert not validate_config(int_or_dict, {'a_string': 2}).success assert not validate_config(int_or_dict, {'a_string': 'kjdfk', 'extra_field': 'kd'}).success
def test_field_alias_order_irrelevant(): int_dict_1 = Shape( fields={ "an_int": Field(int), "another_int": Field(int) }, field_aliases={ "an_int": "foo", "another_int": "bar" }, ) int_dict_2 = Shape( fields={ "an_int": Field(int), "another_int": Field(int) }, field_aliases={ "another_int": "bar", "an_int": "foo" }, ) assert int_dict_1 is int_dict_2 assert int_dict_1.key == int_dict_2.key
def test_list_of_dict(): inner_dict_dagster_type = Shape({'foo': Field(str)}) list_of_dict_meta = meta_from_dagster_type([inner_dict_dagster_type]) assert list_of_dict_meta.key.startswith('Array') assert list_of_dict_meta.inner_type_refs assert len(list_of_dict_meta.inner_type_refs) == 1 # Both Shape[...] and str are NonGenericTypeRefMetas in this schema dict_ref = list_of_dict_meta.type_param_refs[0] assert isinstance(dict_ref, NonGenericTypeRefMeta) assert dict_ref.key.startswith('Shape') assert (len(list_of_dict_meta.type_param_refs) == 1 and list_of_dict_meta.type_param_refs[0].key == resolve_to_config_type(inner_dict_dagster_type).key)
def test_list_of_scalar_or_dict(): int_or_dict_list = resolve_to_config_type( [ ScalarUnion( scalar_type=resolve_to_config_type(int), non_scalar_type=Shape({'a_string': str}) ) ] ) assert validate_config(int_or_dict_list, []).success assert validate_config(int_or_dict_list, [2]).success assert validate_config(int_or_dict_list, [{'a_string': 'kjdfd'}]).success assert validate_config(int_or_dict_list, [2, {'a_string': 'kjdfd'}]).success assert not validate_config(int_or_dict_list, [2, {'wrong_key': 'kjdfd'}]).success assert not validate_config(int_or_dict_list, [2, {'a_string': 2343}]).success assert not validate_config(int_or_dict_list, ['kjdfkd', {'a_string': 'kjdfd'}]).success
def _define_spark_submit_task(): parameters = Field( [String], description="Command-line parameters passed to spark submit.", is_required=True, ) return Field( Shape(fields={"parameters": parameters}), description="Important!" "You can Spark submit tasks only on new clusters. " "In the new_cluster specification, libraries and spark_conf are not supported. Instead, " "use --jars and --py-files to add Java and Python libraries and use --conf to set the " "Spark configuration. " "master, deploy-mode, and executor-cores are automatically configured by Databricks; " "you cannot specify them in parameters. " "By default, the Spark submit job uses all available memory (excluding reserved memory " "for Databricks services). You can set --driver-memory, and --executor-memory to a " "smaller value to leave some room for off-heap usage. " "The --jars, --py-files, --files arguments support DBFS and S3 paths.", )
def _define_spark_python_task(): python_file = Field( String, description= "The URI of the Python file to be executed. DBFS and S3 paths are supported." "This field is required.", is_required=True, ) parameters = Field( [String], description= "Command line parameters that will be passed to the Python file.", is_required=False, default_value=[], ) return Field( Shape(fields={ "python_file": python_file, "parameters": parameters }))