Exemple #1
0
def test_construct_same_perm_dicts():
    int_perm_dict_1 = PermissiveDict(fields={'an_int': Field(int)})
    int_perm_dict_2 = PermissiveDict(fields={'an_int': Field(int)})

    # assert identical object
    assert int_perm_dict_1 is int_perm_dict_2
    # assert equivalent key
    assert int_perm_dict_1.key == int_perm_dict_2.key
def test_invalid_permissive_dict_field():

    with pytest.raises(DagsterInvalidDefinitionError) as exc_info:
        PermissiveDict({'val': Int, 'another_val': Field(Int)})

    assert str(exc_info.value) == (
        'You have passed a config type "Int" in the parameter "fields" and it is '
        'in the "val" entry of that dict. It is from a PermissiveDict with fields '
        '[\'another_val\', \'val\']. You have likely '
        'forgot to wrap this type in a Field.')
Exemple #3
0
def _define_configurations():
    return Field(
        List[
            Dict(
                fields={
                    'Classification': Field(
                        String,
                        description='The classification within a configuration.',
                        is_optional=True,
                    ),
                    'Configurations': Field(
                        List[PermissiveDict()],
                        description='''A list of additional configurations to apply within a
                                configuration object.''',
                        is_optional=True,
                    ),
                    'Properties': Field(
                        PermissiveDict(),
                        description='''A set of properties specified within a configuration
                                classification.''',
                        is_optional=True,
                    ),
                }
            )
        ],
        description='''For Amazon EMR releases 4.0 and later. The list of configurations supplied
        for the EMR cluster you are creating.

        An optional configuration specification to be used when provisioning cluster instances,
        which can include configurations for applications and software bundled with Amazon EMR. A
        configuration consists of a classification, properties, and optional nested configurations.
        A classification refers to an application-specific configuration file. Properties are the
        settings you want to change in that file. For more information, see the EMR Configuring
        Applications guide.''',
        is_optional=True,
    )
Exemple #4
0
def test_apply_default_values():
    scalar_config_type = resolve_to_config_type(String)
    assert apply_default_values(scalar_config_type, 'foo') == 'foo'
    assert apply_default_values(scalar_config_type, 3) == 3
    assert apply_default_values(scalar_config_type, {}) == {}
    assert apply_default_values(scalar_config_type, None) is None

    enum_config_type = resolve_to_config_type(
        Enum('an_enum', [EnumValue('foo'),
                         EnumValue('bar', python_value=3)]))
    assert apply_default_values(enum_config_type, 'foo') == 'foo'
    assert apply_default_values(enum_config_type, 'bar') == 3
    with pytest.raises(CheckError,
                       match='config_value should be pre-validated'):
        apply_default_values(enum_config_type, 'baz')
    with pytest.raises(CheckError,
                       match='config_value should be pre-validated'):
        apply_default_values(enum_config_type, None)

    list_config_type = resolve_to_config_type(List[String])

    assert apply_default_values(list_config_type, ['foo']) == ['foo']
    assert apply_default_values(list_config_type, None) == []
    with pytest.raises(CheckError, match='Null list member not caught'):
        assert apply_default_values(list_config_type, [None]) == [None]

    nullable_list_config_type = resolve_to_config_type(List[Optional[String]])
    assert apply_default_values(nullable_list_config_type, ['foo']) == ['foo']
    assert apply_default_values(nullable_list_config_type, [None]) == [None]
    assert apply_default_values(nullable_list_config_type, None) == []

    composite_config_type = resolve_to_config_type(
        Dict({
            'foo': Field(String),
            'bar': Field(Dict({'baz': Field(List[String])})),
            'quux': Field(String, is_optional=True, default_value='zip'),
            'quiggle': Field(String, is_optional=True),
        }))

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(composite_config_type, {})

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(composite_config_type, {
            'bar': {
                'baz': ['giraffe']
            },
            'quux': 'nimble'
        })

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(composite_config_type, {
            'foo': 'zowie',
            'quux': 'nimble'
        })

    assert apply_default_values(composite_config_type, {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        },
        'quux': 'nimble'
    }) == {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        },
        'quux': 'nimble'
    }

    assert apply_default_values(composite_config_type, {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        }
    }) == {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        },
        'quux': 'zip'
    }

    assert apply_default_values(composite_config_type, {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        },
        'quiggle': 'squiggle'
    }) == {
        'foo': 'zowie',
        'bar': {
            'baz': ['giraffe']
        },
        'quux': 'zip',
        'quiggle': 'squiggle'
    }

    nested_composite_config_type = resolve_to_config_type(
        Dict({
            'fruts':
            Field(
                Dict({
                    'apple':
                    Field(String),
                    'banana':
                    Field(String, is_optional=True),
                    'potato':
                    Field(String, is_optional=True, default_value='pie'),
                }))
        }))

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(nested_composite_config_type, {'fruts': None})

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(nested_composite_config_type,
                             {'fruts': {
                                 'banana': 'good',
                                 'potato': 'bad'
                             }})

    assert apply_default_values(nested_composite_config_type,
                                {'fruts': {
                                    'apple': 'strawberry'
                                }}) == {
                                    'fruts': {
                                        'apple': 'strawberry',
                                        'potato': 'pie'
                                    }
                                }

    assert apply_default_values(
        nested_composite_config_type,
        {'fruts': {
            'apple': 'a',
            'banana': 'b',
            'potato': 'c'
        }}) == {
            'fruts': {
                'apple': 'a',
                'banana': 'b',
                'potato': 'c'
            }
        }

    any_config_type = resolve_to_config_type(Any)

    assert apply_default_values(any_config_type, {'foo': 'bar'}) == {
        'foo': 'bar'
    }

    with pytest.raises(CheckError, match='Unsupported type'):
        assert apply_default_values(
            ConfigType('gargle', 'bargle', ConfigTypeKind.REGULAR), 3)

    selector_config_type = resolve_to_config_type(
        Selector({
            'one':
            Field(String),
            'another':
            Field(
                Dict({
                    'foo':
                    Field(String, default_value='bar', is_optional=True)
                })),
            'yet_another':
            Field(String, default_value='quux', is_optional=True),
        }))

    with pytest.raises(CheckError):
        apply_default_values(selector_config_type, 'one')

    with pytest.raises(ParameterCheckError):
        apply_default_values(selector_config_type, None)

    with pytest.raises(ParameterCheckError,
                       match='Expected dict with single item'):
        apply_default_values(selector_config_type, {})

    with pytest.raises(CheckError):
        apply_default_values(selector_config_type, {
            'one': 'foo',
            'another': 'bar'
        })

    assert apply_default_values(selector_config_type, {'one': 'foo'}) == {
        'one': 'foo'
    }

    assert apply_default_values(selector_config_type, {'one': None}) == {
        'one': None
    }

    assert apply_default_values(selector_config_type, {'one': {}}) == {
        'one': {}
    }

    assert apply_default_values(selector_config_type, {'another': {}}) == {
        'another': {
            'foo': 'bar'
        }
    }

    singleton_selector_config_type = resolve_to_config_type(
        Selector({'foo': Field(String, default_value='bar',
                               is_optional=True)}))

    assert apply_default_values(singleton_selector_config_type, None) == {
        'foo': 'bar'
    }

    permissive_dict_config_type = resolve_to_config_type(
        PermissiveDict({
            'foo':
            Field(String),
            'bar':
            Field(String, default_value='baz', is_optional=True)
        }))

    with pytest.raises(CheckError,
                       match='Missing non-optional composite member'):
        apply_default_values(permissive_dict_config_type, None)

    assert apply_default_values(permissive_dict_config_type, {
        'foo': 'wow',
        'mau': 'mau'
    }) == {
        'foo': 'wow',
        'bar': 'baz',
        'mau': 'mau',
    }
Exemple #5
0
def define_dataproc_job_config():
    return Field(
        Dict(
            fields={
                'pysparkJob':
                Field(
                    Dict(
                        fields={
                            'mainPythonFileUri':
                            Field(
                                String,
                                description=
                                '''Required. The HCFS URI of the main Python file to use
                                as the driver. Must be a .py file.''',
                                is_optional=True,
                            ),
                            'archiveUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of archives to be extracted in
                                the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.''',
                                is_optional=True,
                            ),
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of jar files to add to the
                                CLASSPATHs of the Python driver and tasks.''',
                                is_optional=True,
                            ),
                            'loggingConfig':
                            Field(
                                Dict(
                                    fields={
                                        'driverLogLevels':
                                        Field(
                                            PermissiveDict(),
                                            description=
                                            '''The per-package log levels for the
                                            driver. This may include "root" package name to
                                            configure rootLogger. Examples:  \'com.google = FATAL\',
                                            \'root = INFO\', \'org.apache = DEBUG\'''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''The runtime logging config of the job.''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names to values, used
                                to configure PySpark. Properties that conflict with values set by
                                the Cloud Dataproc API may be overwritten. Can include properties
                                set in /etc/spark/conf/spark-defaults.conf and classes in user
                                code.''',
                                is_optional=True,
                            ),
                            'args':
                            Field(
                                List[String],
                                description=
                                '''Optional. The arguments to pass to the driver. Do not
                                include arguments, such as --conf, that can be set as job
                                properties, since a collision may occur that causes an incorrect job
                                submission.''',
                                is_optional=True,
                            ),
                            'fileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of files to be copied to the
                                working directory of Python drivers and distributed tasks. Useful
                                for naively parallel tasks.''',
                                is_optional=True,
                            ),
                            'pythonFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS file URIs of Python files to pass to
                                the PySpark framework. Supported file types: .py, .egg, and
                                .zip.''',
                                is_optional=True,
                            ),
                        }),
                    description=
                    '''A Cloud Dataproc job for running Apache PySpark
                    (https://spark.apache.org/docs/0.9.0/python-programming-guide.html) applications
                    on YARN.''',
                    is_optional=True,
                ),
                'reference':
                Field(
                    Dict(
                        fields={
                            'projectId':
                            Field(
                                String,
                                description=
                                '''Required. The ID of the Google Cloud Platform project
                                that the job belongs to.''',
                                is_optional=True,
                            ),
                            'jobId':
                            Field(
                                String,
                                description=
                                '''Optional. The job ID, which must be unique within the
                                project.The ID must contain only letters (a-z, A-Z), numbers (0-9),
                                underscores (_), or hyphens (-). The maximum length is 100
                                characters.If not specified by the caller, the job ID will be
                                provided by the server.''',
                                is_optional=True,
                                default_value='dagster-job-' +
                                str(uuid.uuid4()),
                            ),
                        }),
                    description=
                    '''Encapsulates the full scoping used to reference a job.''',
                    is_optional=True,
                ),
                'hadoopJob':
                Field(
                    Dict(
                        fields={
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. Jar file URIs to add to the CLASSPATHs of
                                the Hadoop driver and tasks.''',
                                is_optional=True,
                            ),
                            'loggingConfig':
                            Field(
                                Dict(
                                    fields={
                                        'driverLogLevels':
                                        Field(
                                            PermissiveDict(),
                                            description=
                                            '''The per-package log levels for the
                                            driver. This may include "root" package name to
                                            configure rootLogger. Examples:  \'com.google = FATAL\',
                                            \'root = INFO\', \'org.apache = DEBUG\'''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''The runtime logging config of the job.''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names to values, used
                                to configure Hadoop. Properties that conflict with values set by the
                                Cloud Dataproc API may be overwritten. Can include properties set in
                                /etc/hadoop/conf/*-site and classes in user code.''',
                                is_optional=True,
                            ),
                            'args':
                            Field(
                                List[String],
                                description=
                                '''Optional. The arguments to pass to the driver. Do not
                                include arguments, such as -libjars or -Dfoo=bar, that can be set as
                                job properties, since a collision may occur that causes an incorrect
                                job submission.''',
                                is_optional=True,
                            ),
                            'fileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS (Hadoop Compatible Filesystem) URIs of
                                files to be copied to the working directory of Hadoop drivers and
                                distributed tasks. Useful for naively parallel tasks.''',
                                is_optional=True,
                            ),
                            'mainClass':
                            Field(
                                String,
                                description=
                                '''The name of the driver\'s main class. The jar file
                                containing the class must be in the default CLASSPATH or specified
                                in jar_file_uris.''',
                                is_optional=True,
                            ),
                            'archiveUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of archives to be extracted in
                                the working directory of Hadoop drivers and tasks. Supported file
                                types: .jar, .tar, .tar.gz, .tgz, or .zip.''',
                                is_optional=True,
                            ),
                            'mainJarFileUri':
                            Field(
                                String,
                                description=
                                '''The HCFS URI of the jar file containing the main
                                class. Examples:
                                \'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar\'
                                \'hdfs:/tmp/test-samples/custom-wordcount.jar\'
                                \'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar\'''',
                                is_optional=True,
                            ),
                        }),
                    description=
                    '''A Cloud Dataproc job for running Apache Hadoop MapReduce
                    (https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
                    jobs on Apache Hadoop YARN
                    (https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).''',
                    is_optional=True,
                ),
                'status':
                Field(Dict(fields={}),
                      description='''Cloud Dataproc job status.''',
                      is_optional=True),
                'placement':
                Field(
                    Dict(
                        fields={
                            'clusterName':
                            Field(
                                String,
                                description=
                                '''Required. The name of the cluster where the job will
                                be submitted.''',
                                is_optional=True,
                            )
                        }),
                    description='''Cloud Dataproc job config.''',
                    is_optional=True,
                ),
                'scheduling':
                Field(
                    Dict(
                        fields={
                            'maxFailuresPerHour':
                            Field(
                                Int,
                                description=
                                '''Optional. Maximum number of times per hour a driver
                                may be restarted as a result of driver terminating with non-zero
                                code before job is reported failed.A job may be reported as
                                thrashing if driver exits with non-zero code 4 times within 10
                                minute window.Maximum value is 10.''',
                                is_optional=True,
                            )
                        }),
                    description='''Job scheduling options.''',
                    is_optional=True,
                ),
                'pigJob':
                Field(
                    Dict(
                        fields={
                            'queryFileUri':
                            Field(
                                String,
                                description=
                                '''The HCFS URI of the script that contains the Pig
                                queries.''',
                                is_optional=True,
                            ),
                            'queryList':
                            Field(
                                Dict(
                                    fields={
                                        'queries':
                                        Field(
                                            List[String],
                                            description=
                                            '''Required. The queries to execute. You do
                                            not need to terminate a query with a semicolon. Multiple
                                            queries can be specified in one string by separating
                                            each with a semicolon. Here is an example of an Cloud
                                            Dataproc API snippet that uses a QueryList to specify a
                                            HiveJob: "hiveJob": {   "queryList": {     "queries": [
                                            "query1",       "query2",       "query3;query4",     ]
                                            } } ''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''A list of queries to run on a cluster.''',
                                is_optional=True,
                            ),
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of jar files to add to the
                                CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can
                                contain Pig UDFs.''',
                                is_optional=True,
                            ),
                            'scriptVariables':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. Mapping of query variable names to values
                                (equivalent to the Pig command: name=[value]).''',
                                is_optional=True,
                            ),
                            'loggingConfig':
                            Field(
                                Dict(
                                    fields={
                                        'driverLogLevels':
                                        Field(
                                            PermissiveDict(),
                                            description=
                                            '''The per-package log levels for the
                                            driver. This may include "root" package name to
                                            configure rootLogger. Examples:  \'com.google = FATAL\',
                                            \'root = INFO\', \'org.apache = DEBUG\'''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''The runtime logging config of the job.''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names to values, used
                                to configure Pig. Properties that conflict with values set by the
                                Cloud Dataproc API may be overwritten. Can include properties set in
                                /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and
                                classes in user code.''',
                                is_optional=True,
                            ),
                            'continueOnFailure':
                            Field(
                                Bool,
                                description=
                                '''Optional. Whether to continue executing queries if a
                                query fails. The default value is false. Setting to true can be
                                useful when executing independent parallel queries.''',
                                is_optional=True,
                            ),
                        }),
                    description='''A Cloud Dataproc job for running Apache Pig
                    (https://pig.apache.org/) queries on YARN.''',
                    is_optional=True,
                ),
                'hiveJob':
                Field(
                    Dict(
                        fields={
                            'queryFileUri':
                            Field(
                                String,
                                description=
                                '''The HCFS URI of the script that contains Hive
                                queries.''',
                                is_optional=True,
                            ),
                            'queryList':
                            Field(
                                Dict(
                                    fields={
                                        'queries':
                                        Field(
                                            List[String],
                                            description=
                                            '''Required. The queries to execute. You do
                                            not need to terminate a query with a semicolon. Multiple
                                            queries can be specified in one string by separating
                                            each with a semicolon. Here is an example of an Cloud
                                            Dataproc API snippet that uses a QueryList to specify a
                                            HiveJob: "hiveJob": {   "queryList": {     "queries": [
                                            "query1",       "query2",       "query3;query4",     ]
                                            } } ''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''A list of queries to run on a cluster.''',
                                is_optional=True,
                            ),
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of jar files to add to the
                                CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can
                                contain Hive SerializationStrategys and UDFs.''',
                                is_optional=True,
                            ),
                            'scriptVariables':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. Mapping of query variable names to values
                                (equivalent to the Hive command: SET name="value";).''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names and values,
                                used to configure Hive. Properties that conflict with values set by
                                the Cloud Dataproc API may be overwritten. Can include properties
                                set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml,
                                and classes in user code.''',
                                is_optional=True,
                            ),
                            'continueOnFailure':
                            Field(
                                Bool,
                                description=
                                '''Optional. Whether to continue executing queries if a
                                query fails. The default value is false. Setting to true can be
                                useful when executing independent parallel queries.''',
                                is_optional=True,
                            ),
                        }),
                    description='''A Cloud Dataproc job for running Apache Hive
                    (https://hive.apache.org/) queries on YARN.''',
                    is_optional=True,
                ),
                'labels':
                Field(
                    PermissiveDict(),
                    description=
                    '''Optional. The labels to associate with this job. Label keys must
                    contain 1 to 63 characters, and must conform to RFC 1035
                    (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if
                    present, must contain 1 to 63 characters, and must conform to RFC 1035
                    (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated
                    with a job.''',
                    is_optional=True,
                ),
                'sparkSqlJob':
                Field(
                    Dict(
                        fields={
                            'queryFileUri':
                            Field(
                                String,
                                description=
                                '''The HCFS URI of the script that contains SQL
                                queries.''',
                                is_optional=True,
                            ),
                            'queryList':
                            Field(
                                Dict(
                                    fields={
                                        'queries':
                                        Field(
                                            List[String],
                                            description=
                                            '''Required. The queries to execute. You do
                                            not need to terminate a query with a semicolon. Multiple
                                            queries can be specified in one string by separating
                                            each with a semicolon. Here is an example of an Cloud
                                            Dataproc API snippet that uses a QueryList to specify a
                                            HiveJob: "hiveJob": {   "queryList": {     "queries": [
                                            "query1",       "query2",       "query3;query4",     ]
                                            } } ''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''A list of queries to run on a cluster.''',
                                is_optional=True,
                            ),
                            'scriptVariables':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. Mapping of query variable names to values
                                (equivalent to the Spark SQL command: SET name="value";).''',
                                is_optional=True,
                            ),
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of jar files to be added to the
                                Spark CLASSPATH.''',
                                is_optional=True,
                            ),
                            'loggingConfig':
                            Field(
                                Dict(
                                    fields={
                                        'driverLogLevels':
                                        Field(
                                            PermissiveDict(),
                                            description=
                                            '''The per-package log levels for the
                                            driver. This may include "root" package name to
                                            configure rootLogger. Examples:  \'com.google = FATAL\',
                                            \'root = INFO\', \'org.apache = DEBUG\'''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''The runtime logging config of the job.''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names to values, used
                                to configure Spark SQL\'s SparkConf. Properties that conflict with
                                values set by the Cloud Dataproc API may be overwritten.''',
                                is_optional=True,
                            ),
                        }),
                    description=
                    '''A Cloud Dataproc job for running Apache Spark SQL
                    (http://spark.apache.org/sql/) queries.''',
                    is_optional=True,
                ),
                'sparkJob':
                Field(
                    Dict(
                        fields={
                            'mainJarFileUri':
                            Field(
                                String,
                                description=
                                '''The HCFS URI of the jar file that contains the main
                                class.''',
                                is_optional=True,
                            ),
                            'jarFileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of jar files to add to the
                                CLASSPATHs of the Spark driver and tasks.''',
                                is_optional=True,
                            ),
                            'loggingConfig':
                            Field(
                                Dict(
                                    fields={
                                        'driverLogLevels':
                                        Field(
                                            PermissiveDict(),
                                            description=
                                            '''The per-package log levels for the
                                            driver. This may include "root" package name to
                                            configure rootLogger. Examples:  \'com.google = FATAL\',
                                            \'root = INFO\', \'org.apache = DEBUG\'''',
                                            is_optional=True,
                                        )
                                    }),
                                description=
                                '''The runtime logging config of the job.''',
                                is_optional=True,
                            ),
                            'properties':
                            Field(
                                PermissiveDict(),
                                description=
                                '''Optional. A mapping of property names to values, used
                                to configure Spark. Properties that conflict with values set by the
                                Cloud Dataproc API may be overwritten. Can include properties set in
                                /etc/spark/conf/spark-defaults.conf and classes in user code.''',
                                is_optional=True,
                            ),
                            'args':
                            Field(
                                List[String],
                                description=
                                '''Optional. The arguments to pass to the driver. Do not
                                include arguments, such as --conf, that can be set as job
                                properties, since a collision may occur that causes an incorrect job
                                submission.''',
                                is_optional=True,
                            ),
                            'fileUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of files to be copied to the
                                working directory of Spark drivers and distributed tasks. Useful for
                                naively parallel tasks.''',
                                is_optional=True,
                            ),
                            'mainClass':
                            Field(
                                String,
                                description=
                                '''The name of the driver\'s main class. The jar file
                                that contains the class must be in the default CLASSPATH or
                                specified in jar_file_uris.''',
                                is_optional=True,
                            ),
                            'archiveUris':
                            Field(
                                List[String],
                                description=
                                '''Optional. HCFS URIs of archives to be extracted in
                                the working directory of Spark drivers and tasks. Supported file
                                types: .jar, .tar, .tar.gz, .tgz, and .zip.''',
                                is_optional=True,
                            ),
                        }),
                    description='''A Cloud Dataproc job for running Apache Spark
                    (http://spark.apache.org/) applications on YARN.''',
                    is_optional=True,
                ),
            }),
        description='''A Cloud Dataproc job resource.''',
        is_optional=True,
    )
    }),
    is_optional=True,
    default_value={'en': {
        'whom': 'world'
    }},
))
def hello_world_default(context) -> str:
    if 'haw' in context.solid_config:
        return 'Aloha {whom}!'.format(whom=context.solid_config['haw']['whom'])
    if 'cn' in context.solid_config:
        return '你好,{whom}!'.format(whom=context.solid_config['cn']['whom'])
    if 'en' in context.solid_config:
        return 'Hello, {whom}!'.format(whom=context.solid_config['en']['whom'])


@solid(config=Field(PermissiveDict({'required': Field(String)})))
def partially_specified_config(context) -> List:
    return sorted(list(context.solid_config.items()))


def test_any_config():
    res = execute_solid(
        any_config,
        environment_dict={'solids': {
            'any_config': {
                'config': 'foo'
            }
        }})
    assert res.output_value() == 'foo'

    res = execute_solid(any_config,
Exemple #7
0
def _multiple_required_fields_config_permissive_dict():
    return Field(
        PermissiveDict({
            'field_one': Field(String),
            'field_two': Field(String)
        }))
def define_dataproc_cluster_config():
    return Field(
        Dict(
            fields={
                'masterConfig': Field(
                    Dict(
                        fields={
                            'accelerators': Field(
                                List[
                                    Dict(
                                        fields={
                                            'acceleratorCount': Field(
                                                Int,
                                                description='''The number of the accelerator cards of
                                            this type exposed to this instance.''',
                                                is_optional=True,
                                            ),
                                            'acceleratorTypeUri': Field(
                                                String,
                                                description='''Full URL, partial URI, or short name of
                                            the accelerator type resource to expose to this
                                            instance. See Compute Engine AcceleratorTypes.Examples:
                                            https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            nvidia-tesla-k80Auto Zone Exception: If you are using
                                            the Cloud Dataproc Auto Zone Placement feature, you must
                                            use the short name of the accelerator type resource, for
                                            example, nvidia-tesla-k80.''',
                                                is_optional=True,
                                            ),
                                        }
                                    )
                                ],
                                description='''Optional. The Compute Engine accelerator
                                configuration for these instances.Beta Feature: This feature is
                                still under development. It may be changed before final release.''',
                                is_optional=True,
                            ),
                            'numInstances': Field(
                                Int,
                                description='''Optional. The number of VM instances in the instance
                                group. For master instance groups, must be set to 1.''',
                                is_optional=True,
                            ),
                            'diskConfig': Field(
                                Dict(
                                    fields={
                                        'numLocalSsds': Field(
                                            Int,
                                            description='''Optional. Number of attached SSDs, from 0
                                            to 4 (default is 0). If SSDs are not attached, the boot
                                            disk is used to store runtime logs and HDFS
                                            (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
                                            data. If one or more SSDs are attached, this runtime
                                            bulk data is spread across them, and the boot disk
                                            contains only basic config and installed binaries.''',
                                            is_optional=True,
                                        ),
                                        'bootDiskSizeGb': Field(
                                            Int,
                                            description='''Optional. Size in GB of the boot disk
                                            (default is 500GB).''',
                                            is_optional=True,
                                        ),
                                        'bootDiskType': Field(
                                            String,
                                            description='''Optional. Type of the boot disk (default
                                            is "pd-standard"). Valid values: "pd-ssd" (Persistent
                                            Disk Solid State Drive) or "pd-standard" (Persistent
                                            Disk Hard Disk Drive).''',
                                            is_optional=True,
                                        ),
                                    }
                                ),
                                description='''Specifies the config of disk options for a group of
                                VM instances.''',
                                is_optional=True,
                            ),
                            'managedGroupConfig': Field(
                                Dict(fields={}),
                                description='''Specifies the resources used to actively manage an
                                instance group.''',
                                is_optional=True,
                            ),
                            'isPreemptible': Field(
                                Bool,
                                description='''Optional. Specifies that this instance group contains
                                preemptible instances.''',
                                is_optional=True,
                            ),
                            'imageUri': Field(
                                String,
                                description='''Optional. The Compute Engine image resource used for
                                cluster instances. It can be specified or may be inferred from
                                SoftwareConfig.image_version.''',
                                is_optional=True,
                            ),
                            'machineTypeUri': Field(
                                String,
                                description='''Optional. The Compute Engine machine type used for
                                cluster instances.A full URL, partial URI, or short name are valid.
                                Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                n1-standard-2Auto Zone Exception: If you are using the Cloud
                                Dataproc Auto Zone Placement feature, you must use the short name of
                                the machine type resource, for example, n1-standard-2.''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='''Optional. The config settings for Compute Engine resources in an
                    instance group, such as a master or worker group.''',
                    is_optional=True,
                ),
                'secondaryWorkerConfig': Field(
                    Dict(
                        fields={
                            'accelerators': Field(
                                List[
                                    Dict(
                                        fields={
                                            'acceleratorCount': Field(
                                                Int,
                                                description='''The number of the accelerator cards of
                                            this type exposed to this instance.''',
                                                is_optional=True,
                                            ),
                                            'acceleratorTypeUri': Field(
                                                String,
                                                description='''Full URL, partial URI, or short name of
                                            the accelerator type resource to expose to this
                                            instance. See Compute Engine AcceleratorTypes.Examples:
                                            https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            nvidia-tesla-k80Auto Zone Exception: If you are using
                                            the Cloud Dataproc Auto Zone Placement feature, you must
                                            use the short name of the accelerator type resource, for
                                            example, nvidia-tesla-k80.''',
                                                is_optional=True,
                                            ),
                                        }
                                    )
                                ],
                                description='''Optional. The Compute Engine accelerator
                                configuration for these instances.Beta Feature: This feature is
                                still under development. It may be changed before final release.''',
                                is_optional=True,
                            ),
                            'numInstances': Field(
                                Int,
                                description='''Optional. The number of VM instances in the instance
                                group. For master instance groups, must be set to 1.''',
                                is_optional=True,
                            ),
                            'diskConfig': Field(
                                Dict(
                                    fields={
                                        'numLocalSsds': Field(
                                            Int,
                                            description='''Optional. Number of attached SSDs, from 0
                                            to 4 (default is 0). If SSDs are not attached, the boot
                                            disk is used to store runtime logs and HDFS
                                            (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
                                            data. If one or more SSDs are attached, this runtime
                                            bulk data is spread across them, and the boot disk
                                            contains only basic config and installed binaries.''',
                                            is_optional=True,
                                        ),
                                        'bootDiskSizeGb': Field(
                                            Int,
                                            description='''Optional. Size in GB of the boot disk
                                            (default is 500GB).''',
                                            is_optional=True,
                                        ),
                                        'bootDiskType': Field(
                                            String,
                                            description='''Optional. Type of the boot disk (default
                                            is "pd-standard"). Valid values: "pd-ssd" (Persistent
                                            Disk Solid State Drive) or "pd-standard" (Persistent
                                            Disk Hard Disk Drive).''',
                                            is_optional=True,
                                        ),
                                    }
                                ),
                                description='''Specifies the config of disk options for a group of
                                VM instances.''',
                                is_optional=True,
                            ),
                            'managedGroupConfig': Field(
                                Dict(fields={}),
                                description='''Specifies the resources used to actively manage an
                                instance group.''',
                                is_optional=True,
                            ),
                            'isPreemptible': Field(
                                Bool,
                                description='''Optional. Specifies that this instance group contains
                                preemptible instances.''',
                                is_optional=True,
                            ),
                            'imageUri': Field(
                                String,
                                description='''Optional. The Compute Engine image resource used for
                                cluster instances. It can be specified or may be inferred from
                                SoftwareConfig.image_version.''',
                                is_optional=True,
                            ),
                            'machineTypeUri': Field(
                                String,
                                description='''Optional. The Compute Engine machine type used for
                                cluster instances.A full URL, partial URI, or short name are valid.
                                Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                n1-standard-2Auto Zone Exception: If you are using the Cloud
                                Dataproc Auto Zone Placement feature, you must use the short name of
                                the machine type resource, for example, n1-standard-2.''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='''Optional. The config settings for Compute Engine resources in an
                    instance group, such as a master or worker group.''',
                    is_optional=True,
                ),
                'encryptionConfig': Field(
                    Dict(
                        fields={
                            'gcePdKmsKeyName': Field(
                                String,
                                description='''Optional. The Cloud KMS key name to use for PD disk
                                encryption for all instances in the cluster.''',
                                is_optional=True,
                            )
                        }
                    ),
                    description='''Encryption settings for the cluster.''',
                    is_optional=True,
                ),
                'securityConfig': Field(
                    Dict(
                        fields={
                            'kerberosConfig': Field(
                                Dict(
                                    fields={
                                        'truststorePasswordUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of a KMS
                                            encrypted file containing the password to the user
                                            provided truststore. For the self-signed certificate,
                                            this password is generated by Dataproc.''',
                                            is_optional=True,
                                        ),
                                        'enableKerberos': Field(
                                            Bool,
                                            description='''Optional. Flag to indicate whether to
                                            Kerberize the cluster.''',
                                            is_optional=True,
                                        ),
                                        'truststoreUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of the
                                            truststore file used for SSL encryption. If not
                                            provided, Dataproc will provide a self-signed
                                            certificate.''',
                                            is_optional=True,
                                        ),
                                        'crossRealmTrustRealm': Field(
                                            String,
                                            description='''Optional. The remote realm the Dataproc
                                            on-cluster KDC will trust, should the user enable cross
                                            realm trust.''',
                                            is_optional=True,
                                        ),
                                        'rootPrincipalPasswordUri': Field(
                                            String,
                                            description='''Required. The Cloud Storage URI of a KMS
                                            encrypted file containing the root principal
                                            password.''',
                                            is_optional=True,
                                        ),
                                        'kmsKeyUri': Field(
                                            String,
                                            description='''Required. The uri of the KMS key used to
                                            encrypt various sensitive files.''',
                                            is_optional=True,
                                        ),
                                        'crossRealmTrustKdc': Field(
                                            String,
                                            description='''Optional. The KDC (IP or hostname) for
                                            the remote trusted realm in a cross realm trust
                                            relationship.''',
                                            is_optional=True,
                                        ),
                                        'crossRealmTrustSharedPasswordUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of a KMS
                                            encrypted file containing the shared password between
                                            the on-cluster Kerberos realm and the remote trusted
                                            realm, in a cross realm trust relationship.''',
                                            is_optional=True,
                                        ),
                                        'tgtLifetimeHours': Field(
                                            Int,
                                            description='''Optional. The lifetime of the ticket
                                            granting ticket, in hours. If not specified, or user
                                            specifies 0, then default value 10 will be used.''',
                                            is_optional=True,
                                        ),
                                        'keystoreUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of the
                                            keystore file used for SSL encryption. If not provided,
                                            Dataproc will provide a self-signed certificate.''',
                                            is_optional=True,
                                        ),
                                        'keyPasswordUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of a KMS
                                            encrypted file containing the password to the user
                                            provided key. For the self-signed certificate, this
                                            password is generated by Dataproc.''',
                                            is_optional=True,
                                        ),
                                        'keystorePasswordUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of a KMS
                                            encrypted file containing the password to the user
                                            provided keystore. For the self-signed certificate, this
                                            password is generated by Dataproc.''',
                                            is_optional=True,
                                        ),
                                        'crossRealmTrustAdminServer': Field(
                                            String,
                                            description='''Optional. The admin server (IP or
                                            hostname) for the remote trusted realm in a cross realm
                                            trust relationship.''',
                                            is_optional=True,
                                        ),
                                        'kdcDbKeyUri': Field(
                                            String,
                                            description='''Optional. The Cloud Storage URI of a KMS
                                            encrypted file containing the master key of the KDC
                                            database.''',
                                            is_optional=True,
                                        ),
                                    }
                                ),
                                description='''Specifies Kerberos related configuration.''',
                                is_optional=True,
                            )
                        }
                    ),
                    description='''Security related configuration, including Kerberos.''',
                    is_optional=True,
                ),
                'initializationActions': Field(
                    List[
                        Dict(
                            fields={
                                'executionTimeout': Field(
                                    String,
                                    description='''Optional. Amount of time executable has to complete.
                                Default is 10 minutes. Cluster creation fails with an explanatory
                                error message (the name of the executable that caused the error and
                                the exceeded timeout period) if the executable is not completed at
                                end of the timeout period.''',
                                    is_optional=True,
                                ),
                                'executableFile': Field(
                                    String,
                                    description='''Required. Cloud Storage URI of executable file.''',
                                    is_optional=True,
                                ),
                            }
                        )
                    ],
                    description='''Optional. Commands to execute on each node after config is
                    completed. By default, executables are run on master and all worker nodes. You
                    can test a node\'s role metadata to run an executable on a master or worker
                    node, as shown below using curl (you can also use wget): ROLE=$(curl -H
                    Metadata-Flavor:Google
                    http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) if [[
                    "${ROLE}" == \'Master\' ]]; then   ... master specific actions ... else   ...
                    worker specific actions ... fi ''',
                    is_optional=True,
                ),
                'configBucket': Field(
                    String,
                    description='''Optional. A Google Cloud Storage bucket used to stage job
                    dependencies, config files, and job driver console output. If you do not specify
                    a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US,
                    ASIA, or EU) for your cluster\'s staging bucket according to the Google Compute
                    Engine zone where your cluster is deployed, and then create and manage this
                    project-level, per-location bucket (see Cloud Dataproc staging bucket).''',
                    is_optional=True,
                ),
                'workerConfig': Field(
                    Dict(
                        fields={
                            'accelerators': Field(
                                List[
                                    Dict(
                                        fields={
                                            'acceleratorCount': Field(
                                                Int,
                                                description='''The number of the accelerator cards of
                                            this type exposed to this instance.''',
                                                is_optional=True,
                                            ),
                                            'acceleratorTypeUri': Field(
                                                String,
                                                description='''Full URL, partial URI, or short name of
                                            the accelerator type resource to expose to this
                                            instance. See Compute Engine AcceleratorTypes.Examples:
                                            https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80
                                            nvidia-tesla-k80Auto Zone Exception: If you are using
                                            the Cloud Dataproc Auto Zone Placement feature, you must
                                            use the short name of the accelerator type resource, for
                                            example, nvidia-tesla-k80.''',
                                                is_optional=True,
                                            ),
                                        }
                                    )
                                ],
                                description='''Optional. The Compute Engine accelerator
                                configuration for these instances.Beta Feature: This feature is
                                still under development. It may be changed before final release.''',
                                is_optional=True,
                            ),
                            'numInstances': Field(
                                Int,
                                description='''Optional. The number of VM instances in the instance
                                group. For master instance groups, must be set to 1.''',
                                is_optional=True,
                            ),
                            'diskConfig': Field(
                                Dict(
                                    fields={
                                        'numLocalSsds': Field(
                                            Int,
                                            description='''Optional. Number of attached SSDs, from 0
                                            to 4 (default is 0). If SSDs are not attached, the boot
                                            disk is used to store runtime logs and HDFS
                                            (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
                                            data. If one or more SSDs are attached, this runtime
                                            bulk data is spread across them, and the boot disk
                                            contains only basic config and installed binaries.''',
                                            is_optional=True,
                                        ),
                                        'bootDiskSizeGb': Field(
                                            Int,
                                            description='''Optional. Size in GB of the boot disk
                                            (default is 500GB).''',
                                            is_optional=True,
                                        ),
                                        'bootDiskType': Field(
                                            String,
                                            description='''Optional. Type of the boot disk (default
                                            is "pd-standard"). Valid values: "pd-ssd" (Persistent
                                            Disk Solid State Drive) or "pd-standard" (Persistent
                                            Disk Hard Disk Drive).''',
                                            is_optional=True,
                                        ),
                                    }
                                ),
                                description='''Specifies the config of disk options for a group of
                                VM instances.''',
                                is_optional=True,
                            ),
                            'managedGroupConfig': Field(
                                Dict(fields={}),
                                description='''Specifies the resources used to actively manage an
                                instance group.''',
                                is_optional=True,
                            ),
                            'isPreemptible': Field(
                                Bool,
                                description='''Optional. Specifies that this instance group contains
                                preemptible instances.''',
                                is_optional=True,
                            ),
                            'imageUri': Field(
                                String,
                                description='''Optional. The Compute Engine image resource used for
                                cluster instances. It can be specified or may be inferred from
                                SoftwareConfig.image_version.''',
                                is_optional=True,
                            ),
                            'machineTypeUri': Field(
                                String,
                                description='''Optional. The Compute Engine machine type used for
                                cluster instances.A full URL, partial URI, or short name are valid.
                                Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2
                                n1-standard-2Auto Zone Exception: If you are using the Cloud
                                Dataproc Auto Zone Placement feature, you must use the short name of
                                the machine type resource, for example, n1-standard-2.''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='''Optional. The config settings for Compute Engine resources in an
                    instance group, such as a master or worker group.''',
                    is_optional=True,
                ),
                'gceClusterConfig': Field(
                    Dict(
                        fields={
                            'networkUri': Field(
                                String,
                                description='''Optional. The Compute Engine network to be used for
                                machine communications. Cannot be specified with subnetwork_uri. If
                                neither network_uri nor subnetwork_uri is specified, the "default"
                                network of the project is used, if it exists. Cannot be a "Custom
                                Subnet Network" (see Using Subnetworks for more information).A full
                                URL, partial URI, or short name are valid. Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default
                                projects/[project_id]/regions/global/default default''',
                                is_optional=True,
                            ),
                            'zoneUri': Field(
                                String,
                                description='''Optional. The zone where the Compute Engine cluster
                                will be located. On a create request, it is required in the "global"
                                region. If omitted in a non-global Cloud Dataproc region, the
                                service will pick a zone in the corresponding Compute Engine region.
                                On a get request, zone will always be present.A full URL, partial
                                URI, or short name are valid. Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]
                                projects/[project_id]/zones/[zone] us-central1-f''',
                                is_optional=True,
                            ),
                            'metadata': Field(
                                PermissiveDict(),
                                description='''The Compute Engine metadata entries to add to all
                                instances (see Project and instance metadata
                                (https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).''',
                                is_optional=True,
                            ),
                            'internalIpOnly': Field(
                                Bool,
                                description='''Optional. If true, all instances in the cluster will
                                only have internal IP addresses. By default, clusters are not
                                restricted to internal IP addresses, and will have ephemeral
                                external IP addresses assigned to each instance. This
                                internal_ip_only restriction can only be enabled for subnetwork
                                enabled networks, and all off-cluster dependencies must be
                                configured to be accessible without external IP addresses.''',
                                is_optional=True,
                            ),
                            'serviceAccountScopes': Field(
                                List[String],
                                description='''Optional. The URIs of service account scopes to be
                                included in Compute Engine instances. The following base set of
                                scopes is always included:
                                https://www.googleapis.com/auth/cloud.useraccounts.readonly
                                https://www.googleapis.com/auth/devstorage.read_write
                                https://www.googleapis.com/auth/logging.writeIf no scopes are
                                specified, the following defaults are also provided:
                                https://www.googleapis.com/auth/bigquery
                                https://www.googleapis.com/auth/bigtable.admin.table
                                https://www.googleapis.com/auth/bigtable.data
                                https://www.googleapis.com/auth/devstorage.full_control''',
                                is_optional=True,
                            ),
                            'tags': Field(
                                List[String],
                                description='''The Compute Engine tags to add to all instances (see
                                Tagging instances).''',
                                is_optional=True,
                            ),
                            'serviceAccount': Field(
                                String,
                                description='''Optional. The service account of the instances.
                                Defaults to the default Compute Engine service account. Custom
                                service accounts need permissions equivalent to the following IAM
                                roles: roles/logging.logWriter roles/storage.objectAdmin(see
                                https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts
                                for more information). Example:
                                [account_id]@[project_id].iam.gserviceaccount.com''',
                                is_optional=True,
                            ),
                            'subnetworkUri': Field(
                                String,
                                description='''Optional. The Compute Engine subnetwork to be used
                                for machine communications. Cannot be specified with network_uri.A
                                full URL, partial URI, or short name are valid. Examples:
                                https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0
                                projects/[project_id]/regions/us-east1/subnetworks/sub0 sub0''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='''Common config settings for resources of Compute Engine cluster
                    instances, applicable to all instances in the cluster.''',
                    is_optional=True,
                ),
                'softwareConfig': Field(
                    Dict(
                        fields={
                            'properties': Field(
                                PermissiveDict(),
                                description='''Optional. The properties to set on daemon config
                                files.Property keys are specified in prefix:property format, for
                                example core:hadoop.tmp.dir. The following are supported prefixes
                                and their mappings: capacity-scheduler: capacity-scheduler.xml core:
                                core-site.xml distcp: distcp-default.xml hdfs: hdfs-site.xml hive:
                                hive-site.xml mapred: mapred-site.xml pig: pig.properties spark:
                                spark-defaults.conf yarn: yarn-site.xmlFor more information, see
                                Cluster properties.''',
                                is_optional=True,
                            ),
                            'optionalComponents': Field(
                                List[Component],
                                description='''The set of optional components to activate on the
                                cluster.''',
                                is_optional=True,
                            ),
                            'imageVersion': Field(
                                String,
                                description='''Optional. The version of software inside the cluster.
                                It must be one of the supported Cloud Dataproc Versions, such as
                                "1.2" (including a subminor version, such as "1.2.29"), or the
                                "preview" version. If unspecified, it defaults to the latest Debian
                                version.''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='''Specifies the selection and config of software inside the
                    cluster.''',
                    is_optional=True,
                ),
            }
        ),
        description='''The cluster config.''',
        is_optional=True,
    )
Exemple #9
0
def define_emr_run_job_flow_config():
    name = Field(String, description='The name of the job flow.', is_optional=False)

    log_uri = Field(
        String,
        description='''The location in Amazon S3 to write the log files of the job flow. If a value
        is not provided, logs are not created.''',
        is_optional=True,
    )

    additional_info = Field(
        String, description='A JSON string for selecting additional features.', is_optional=True
    )

    ami_version = Field(
        String,
        description='''Applies only to Amazon EMR AMI versions 3.x and 2.x. For Amazon EMR releases
        4.0 and later, ReleaseLabel is used. To specify a custom AMI, use CustomAmiID.''',
        is_optional=True,
    )

    release_label = Field(
        String,
        description='''The Amazon EMR release label, which determines the version of open-source
        application packages installed on the cluster. Release labels are in the form emr-x.x.x,
        where x.x.x is an Amazon EMR release version, for example, emr-5.14.0 . For more information
        about Amazon EMR release versions and included application versions and features, see
        https://docs.aws.amazon.com/emr/latest/ReleaseGuide/. The release label applies only to
        Amazon EMR releases versions 4.x and later. Earlier versions use AmiVersion.''',
        is_optional=True,
    )

    instances = Field(
        Dict(
            fields={
                'MasterInstanceType': Field(
                    String,
                    description='The EC2 instance type of the master node.',
                    is_optional=True,
                ),
                'SlaveInstanceType': Field(
                    String,
                    description='The EC2 instance type of the core and task nodes.',
                    is_optional=True,
                ),
                'InstanceCount': Field(
                    Int, description='The number of EC2 instances in the cluster.', is_optional=True
                ),
                'InstanceGroups': _define_instance_groups(),
                'InstanceFleets': _define_instance_fleets(),
                'Ec2KeyName': Field(
                    String,
                    description='''The name of the EC2 key pair that can be used to ssh to the
                    master node as the user called "hadoop."''',
                    is_optional=True,
                ),
                'Placement': Field(
                    Dict(
                        fields={
                            'AvailabilityZone': Field(
                                String,
                                description='''The Amazon EC2 Availability Zone for the cluster.
                                AvailabilityZone is used for uniform instance groups, while
                                AvailabilityZones (plural) is used for instance fleets.''',
                                is_optional=True,
                            ),
                            'AvailabilityZones': Field(
                                List[String],
                                description='''When multiple Availability Zones are specified,
                                Amazon EMR evaluates them and launches instances in the optimal
                                Availability Zone. AvailabilityZones is used for instance fleets,
                                while AvailabilityZone (singular) is used for uniform instance
                                groups.''',
                                is_optional=True,
                            ),
                        }
                    ),
                    description='The Availability Zone in which the cluster runs.',
                    is_optional=True,
                ),
                'KeepJobFlowAliveWhenNoSteps': Field(
                    Bool,
                    description='''Specifies whether the cluster should remain available after
                    completing all steps.''',
                    is_optional=True,
                ),
                'TerminationProtected': Field(
                    Bool,
                    description='''Specifies whether to lock the cluster to prevent the Amazon EC2
                    instances from being terminated by API call, user intervention, or in the event
                    of a job-flow error.''',
                    is_optional=True,
                ),
                'HadoopVersion': Field(
                    String,
                    description='''Applies only to Amazon EMR release versions earlier than 4.0. The
                    Hadoop version for the cluster. Valid inputs are "0.18" (deprecated), "0.20"
                    (deprecated), "0.20.205" (deprecated), "1.0.3", "2.2.0", or "2.4.0". If you do
                    not set this value, the default of 0.18 is used, unless the AmiVersion parameter
                    is set in the RunJobFlow call, in which case the default version of Hadoop for
                    that AMI version is used.''',
                    is_optional=True,
                ),
                'Ec2SubnetId': Field(
                    String,
                    description='''Applies to clusters that use the uniform instance group
                    configuration. To launch the cluster in Amazon Virtual Private Cloud (Amazon
                    VPC), set this parameter to the identifier of the Amazon VPC subnet where you
                    want the cluster to launch. If you do not specify this value, the cluster
                    launches in the normal Amazon Web Services cloud, outside of an Amazon VPC, if
                    the account launching the cluster supports EC2 Classic networks in the region
                    where the cluster launches.
                    Amazon VPC currently does not support cluster compute quadruple extra large
                    (cc1.4xlarge) instances. Thus you cannot specify the cc1.4xlarge instance type
                    for clusters launched in an Amazon VPC.''',
                    is_optional=True,
                ),
                'Ec2SubnetIds': Field(
                    List[String],
                    description='''Applies to clusters that use the instance fleet configuration.
                    When multiple EC2 subnet IDs are specified, Amazon EMR evaluates them and
                    launches instances in the optimal subnet.''',
                    is_optional=True,
                ),
                'EmrManagedMasterSecurityGroup': Field(
                    String,
                    description='''The identifier of the Amazon EC2 security group for the master
                    node.''',
                    is_optional=True,
                ),
                'EmrManagedSlaveSecurityGroup': Field(
                    String,
                    description='''The identifier of the Amazon EC2 security group for the core and
                    task nodes.''',
                    is_optional=True,
                ),
                'ServiceAccessSecurityGroup': Field(
                    String,
                    description='''The identifier of the Amazon EC2 security group for the Amazon
                    EMR service to access clusters in VPC private subnets.''',
                    is_optional=True,
                ),
                'AdditionalMasterSecurityGroups': Field(
                    List[String],
                    description='''A list of additional Amazon EC2 security group IDs for the master
                    node.''',
                    is_optional=True,
                ),
                'AdditionalSlaveSecurityGroups': Field(
                    List[String],
                    description='''A list of additional Amazon EC2 security group IDs for the core
                    and task nodes.''',
                    is_optional=True,
                ),
            }
        ),
        description='A specification of the number and type of Amazon EC2 instances.',
        is_optional=False,
    )

    supported_products = Field(
        List[EmrSupportedProducts],
        description='''A list of strings that indicates third-party software to use. For
                    more information, see the Amazon EMR Developer Guide. Currently supported
                    values are:
                        - "mapr-m3" - launch the job flow using MapR M3 Edition.
                        - "mapr-m5" - launch the job flow using MapR M5 Edition.
                    ''',
        is_optional=True,
    )

    new_supported_products = Field(
        List[
            Dict(
                fields={
                    'Name': Field(String, is_optional=False),
                    'Args': Field(List[String], description='The list of user-supplied arguments.'),
                }
            )
        ],
        description='''
        The list of supported product configurations which allow user-supplied arguments. EMR
        accepts these arguments and forwards them to the corresponding installation script as
        bootstrap action arguments.

        A list of strings that indicates third-party software to use with the job flow that accepts
        a user argument list. EMR accepts and forwards the argument list to the corresponding
        installation script as bootstrap action arguments. For more information, see "Launch a Job
        Flow on the MapR Distribution for Hadoop" in the Amazon EMR Developer Guide.

        Supported values are:
        - "mapr-m3" - launch the cluster using MapR M3 Edition.
        - "mapr-m5" - launch the cluster using MapR M5 Edition.
        - "mapr" with the user arguments specifying "--edition,m3" or "--edition,m5" - launch the
            job flow using MapR M3 or M5 Edition respectively.
        - "mapr-m7" - launch the cluster using MapR M7 Edition.
        - "hunk" - launch the cluster with the Hunk Big Data Analtics Platform.
        - "hue"- launch the cluster with Hue installed.
        - "spark" - launch the cluster with Apache Spark installed.
        - "ganglia" - launch the cluster with the Ganglia Monitoring System installed.''',
        is_optional=True,
    )

    applications = Field(
        List[
            Dict(
                fields={
                    'Name': Field(
                        String, description='The name of the application.', is_optional=False
                    ),
                    'Version': Field(
                        String, description='The version of the application.', is_optional=True
                    ),
                    'Args': Field(
                        List[String],
                        description='Arguments for Amazon EMR to pass to the application.',
                        is_optional=True,
                    ),
                    'AdditionalInfo': Field(
                        PermissiveDict(),
                        description='''This option is for advanced users only. This is meta
                            information about third-party applications that third-party vendors use
                            for testing purposes.''',
                        is_optional=True,
                    ),
                }
            )
        ],
        description='''Applies to Amazon EMR releases 4.0 and later. A case-insensitive list of
        applications for Amazon EMR to install and configure when launching the cluster. For a list
        of applications available for each Amazon EMR release version, see the Amazon EMR Release
        Guide.

        With Amazon EMR release version 4.0 and later, the only accepted parameter is the
        application name. To pass arguments to applications, you use configuration classifications
        specified using configuration JSON objects. For more information, see the EMR Configuring
        Applications guide.

        With earlier Amazon EMR releases, the application is any Amazon or third-party software that
        you can add to the cluster. This structure contains a list of strings that indicates the
        software to use with the cluster and accepts a user argument list. Amazon EMR accepts and
        forwards the argument list to the corresponding installation script as bootstrap action
        argument.''',
        is_optional=True,
    )

    visible_to_all_users = Field(
        Bool,
        description='''Whether the cluster is visible to all IAM users of the AWS account associated
        with the cluster. If this value is set to True, all IAM users of that AWS account can view
        and (if they have the proper policy permissions set) manage the cluster. If it is set to
        False, only the IAM user that created the cluster can view and manage it.''',
        is_optional=True,
        default_value=True,
    )

    job_flow_role = Field(
        String,
        description='''Also called instance profile and EC2 role. An IAM role for an EMR cluster.
        The EC2 instances of the cluster assume this role. The default role is EMR_EC2_DefaultRole.
        In order to use the default role, you must have already created it using the CLI or console.
        ''',
        is_optional=True,
    )

    service_role = Field(
        String,
        description='''The IAM role that will be assumed by the Amazon EMR service to access AWS
        resources on your behalf.''',
        is_optional=True,
    )

    tags = Field(
        List[
            Dict(
                fields={
                    'Key': Field(
                        String,
                        description='''A user-defined key, which is the minimum required information
                        for a valid tag. For more information, see the EMR Tag guide.''',
                        is_optional=False,
                    ),
                    'Value': Field(
                        String,
                        description='''A user-defined value, which is optional in a tag. For more
                        information, see the EMR Tag Clusters guide.''',
                        is_optional=True,
                    ),
                }
            )
        ],
        description='''A list of tags to associate with a cluster and propagate to Amazon EC2
        instances.

        A key/value pair containing user-defined metadata that you can associate with an Amazon EMR
        resource. Tags make it easier to associate clusters in various ways, such as grouping
        clusters to track your Amazon EMR resource allocation costs. For more information, see the
        EMR Tag Clusters guide.''',
        is_optional=True,
    )

    security_configuration = Field(
        String,
        description='The name of a security configuration to apply to the cluster.',
        is_optional=True,
    )

    auto_scaling_role = Field(
        String,
        description='''An IAM role for automatic scaling policies. The default role is
        EMR_AutoScaling_DefaultRole. The IAM role provides permissions that the automatic scaling
        feature requires to launch and terminate EC2 instances in an instance group.''',
        is_optional=True,
    )

    scale_down_behavior = Field(
        EmrScaleDownBehavior,
        description='''Specifies the way that individual Amazon EC2 instances terminate when an
        automatic scale-in activity occurs or an instance group is resized.
        TERMINATE_AT_INSTANCE_HOUR indicates that Amazon EMR terminates nodes at the instance-hour
        boundary, regardless of when the request to terminate the instance was submitted. This
        option is only available with Amazon EMR 5.1.0 and later and is the default for clusters
        created using that version. TERMINATE_AT_TASK_COMPLETION indicates that Amazon EMR
        blacklists and drains tasks from nodes before terminating the Amazon EC2 instances,
        regardless of the instance-hour boundary. With either behavior, Amazon EMR removes the least
        active nodes first and blocks instance termination if it could lead to HDFS corruption.
        TERMINATE_AT_TASK_COMPLETION available only in Amazon EMR version 4.1.0 and later, and is
        the default for versions of Amazon EMR earlier than 5.1.0.''',
        is_optional=True,
    )

    custom_ami_id = Field(
        String,
        description='''Available only in Amazon EMR version 5.7.0 and later. The ID of a custom
        Amazon EBS-backed Linux AMI. If specified, Amazon EMR uses this AMI when it launches cluster
        EC2 instances. For more information about custom AMIs in Amazon EMR, see Using a Custom AMI
        in the Amazon EMR Management Guide. If omitted, the cluster uses the base Linux AMI for the
        ReleaseLabel specified. For Amazon EMR versions 2.x and 3.x, use AmiVersion instead.

        For information about creating a custom AMI, see Creating an Amazon EBS-Backed Linux AMI in
        the Amazon Elastic Compute Cloud User Guide for Linux Instances. For information about
        finding an AMI ID, see Finding a Linux AMI.''',
        is_optional=True,
    )

    repo_upgrade_on_boot = Field(
        EmrRepoUpgradeOnBoot,
        description='''Applies only when CustomAmiID is used. Specifies which updates from the
        Amazon Linux AMI package repositories to apply automatically when the instance boots using
        the AMI. If omitted, the default is SECURITY , which indicates that only security updates
        are applied. If NONE is specified, no updates are applied, and all updates must be applied
        manually.''',
        is_optional=True,
    )

    kerberos_attributes = Field(
        Dict(
            fields={
                'Realm': Field(
                    String,
                    description='''The name of the Kerberos realm to which all nodes in a cluster
                    belong. For example, EC2.INTERNAL.''',
                    is_optional=False,
                ),
                'KdcAdminPassword': Field(
                    String,
                    description='''The password used within the cluster for the kadmin service on
                    the cluster-dedicated KDC, which maintains Kerberos principals, password
                    policies, and keytabs for the cluster.''',
                    is_optional=False,
                ),
                'CrossRealmTrustPrincipalPassword': Field(
                    String,
                    description='''Required only when establishing a cross-realm trust with a KDC in
                    a different realm. The cross-realm principal password, which must be identical
                    across realms.''',
                    is_optional=True,
                ),
                'ADDomainJoinUser': Field(
                    String,
                    description='''Required only when establishing a cross-realm trust with an
                    Active Directory domain. A user with sufficient privileges to join resources to
                    the domain.''',
                    is_optional=True,
                ),
                'ADDomainJoinPassword': Field(
                    String,
                    description='''The Active Directory password for ADDomainJoinUser.''',
                    is_optional=True,
                ),
            }
        ),
        description='''Attributes for Kerberos configuration when Kerberos authentication is enabled
        using a security configuration. For more information see Use Kerberos Authentication in the
        EMR Management Guide .''',
        is_optional=True,
    )

    return Field(
        Dict(
            fields={
                'Name': name,
                'LogUri': log_uri,
                'AdditionalInfo': additional_info,
                'AmiVersion': ami_version,
                'ReleaseLabel': release_label,
                'Instances': instances,
                'Steps': _define_steps(),
                'BootstrapActions': _define_bootstrap_actions(),
                'SupportedProducts': supported_products,
                'NewSupportedProducts': new_supported_products,
                'Applications': applications,
                'Configurations': _define_configurations(),
                'VisibleToAllUsers': visible_to_all_users,
                'JobFlowRole': job_flow_role,
                'ServiceRole': service_role,
                'Tags': tags,
                'SecurityConfiguration': security_configuration,
                'AutoScalingRole': auto_scaling_role,
                'ScaleDownBehavior': scale_down_behavior,
                'CustomAmiId': custom_ami_id,
                'EbsRootVolumeSize': Field(
                    Int,
                    description='''The size, in GiB, of the EBS root device volume of the Linux AMI
                    that is used for each EC2 instance. Available in Amazon EMR version 4.x and
                    later.''',
                    is_optional=True,
                ),
                'RepoUpgradeOnBoot': repo_upgrade_on_boot,
                'KerberosAttributes': kerberos_attributes,
            }
        ),
        description='AWS EMR run job flow configuration',
    )
Exemple #10
0
def test_construct_different_perm_dicts():
    int_perm_dict = PermissiveDict(fields={'an_int': Field(int)})
    string_perm_dict = PermissiveDict(fields={'a_string': Field(str)})

    assert int_perm_dict is not string_perm_dict
    assert int_perm_dict.inst().key != string_perm_dict.inst().key
Exemple #11
0
def test_construct_permissive_dict_same_same():
    assert PermissiveDict() is PermissiveDict()
Exemple #12
0
def test_kitchen_sink():
    big_dict_1 = Dict({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Dict({
                'nested_field_one':
                Field(bool),
                'nested_selector':
                Field(
                    Selector({
                        'int_field_in_selector':
                        Field(int),
                        'permissive_dict_in_selector':
                        Field(PermissiveDict()),
                        'permissive_dict_with_fields_in_selector':
                        Field(PermissiveDict({'string_field': Field(str)})),
                    })),
            })),
    })

    big_dict_2 = Dict({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Dict(
                fields={
                    'nested_field_one':
                    Field(bool),
                    'nested_selector':
                    Field(
                        Selector(
                            fields={
                                'permissive_dict_in_selector':
                                Field(PermissiveDict()),
                                'int_field_in_selector':
                                Field(int),
                                'permissive_dict_with_fields_in_selector':
                                Field(
                                    PermissiveDict(
                                        fields={'string_field': Field(str)})),
                            })),
                })),
    })

    assert big_dict_1 is big_dict_2
    assert big_dict_1.inst().key == big_dict_2.inst().key

    # differs way down in tree
    big_dict_3 = Dict({
        'field_one':
        Field(int, default_value=2, is_optional=True),
        'field_two':
        Field(
            Dict(
                fields={
                    'nested_field_one':
                    Field(bool),
                    'nested_selector':
                    Field(
                        Selector(
                            fields={
                                'permissive_dict_in_selector':
                                Field(PermissiveDict()),
                                'int_field_in_selector':
                                Field(int),
                                'permissive_dict_with_fields_in_selector':
                                Field(
                                    PermissiveDict(
                                        fields={'int_field': Field(int)})),
                            })),
                })),
    })

    assert big_dict_1 is not big_dict_3
    assert big_dict_1.inst().key != big_dict_3.inst().key
Exemple #13
0
def bash_command_solid(bash_command, name=None, output_encoding=None):
    '''Execute a Bash command.
    '''
    check.str_param(bash_command, 'bash_command')
    name = check.opt_str_param(name, 'name', default='bash_solid')
    output_encoding = check.opt_str_param(output_encoding,
                                          'output_encoding',
                                          default='utf-8')

    @solid(
        name=name,
        config={
            'output_logging':
            Field(
                Enum(
                    'OutputType',
                    [
                        EnumValue('STREAM',
                                  description='Stream script stdout/stderr.'),
                        EnumValue(
                            'BUFFER',
                            description=
                            'Buffer bash script stdout/stderr, then log upon completion.',
                        ),
                        EnumValue('NONE', description='No logging'),
                    ],
                ),
                is_optional=True,
                default_value='STREAM',
            ),
            'env':
            Field(
                PermissiveDict(),
                description=
                'Environment variables to pass to the child process; if not provided, '
                'the current process environment will be passed.',
                is_optional=True,
                default_value=None,
            ),
        },
    )
    def _bash_solid(context):
        '''This logic is ported from the Airflow BashOperator implementation.

        https://github.com/apache/airflow/blob/master/airflow/operators/bash_operator.py
        '''
        def log_info_msg(log_str):
            context.log.info('[bash][{name}] '.format(name=name) + log_str)

        tmp_path = seven.get_system_temp_directory()
        log_info_msg('using temporary directory: %s' % tmp_path)

        env = (context.solid_config['env'] if context.solid_config['env']
               is not None else os.environ.copy())

        with NamedTemporaryFile(dir=tmp_path, prefix=name) as tmp_file:
            tmp_file.write(bytes(bash_command.encode('utf-8')))
            tmp_file.flush()
            script_location = os.path.abspath(tmp_file.name)
            log_info_msg('Temporary script location: {location}'.format(
                location=script_location))

            def pre_exec():
                # Restore default signal disposition and invoke setsid
                for sig in ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ'):
                    if hasattr(signal, sig):
                        signal.signal(getattr(signal, sig), signal.SIG_DFL)
                os.setsid()

            log_info_msg(
                'Running command: {command}'.format(command=bash_command))

            # pylint: disable=subprocess-popen-preexec-fn
            sub_process = Popen(
                ['bash', tmp_file.name],
                stdout=PIPE,
                stderr=STDOUT,
                cwd=tmp_path,
                env=env,
                preexec_fn=pre_exec,
            )

            # Stream back logs as they are emitted
            if context.solid_config['output_logging'] == 'STREAM':
                line = ''
                for raw_line in iter(sub_process.stdout.readline, b''):
                    line = raw_line.decode(output_encoding).rstrip()
                    log_info_msg(line)

            sub_process.wait()

            # Collect and buffer all logs, then emit
            if context.solid_config['output_logging'] == 'BUFFER':
                line = ''
                for raw_line in iter(sub_process.stdout.readline, b''):
                    line += raw_line.decode(output_encoding)
                log_info_msg(line)

            # no logging in this case
            elif context.solid_config['output_logging'] == 'NONE':
                pass

            log_info_msg('Command exited with return code {retcode}'.format(
                retcode=sub_process.returncode))

            if sub_process.returncode:
                raise Failure(
                    description='[bash][{name}] Bash command failed'.format(
                        name=name))

        return line

    return _bash_solid
Exemple #14
0
def put_object_configs():
    return Field(
        Dict(
            fields={
                'ACL':
                Field(S3ACL,
                      description='The canned ACL to apply to the object.',
                      is_optional=True),
                # Body will be set by the solid, not supplied in config
                'Bucket':
                Field(
                    String,
                    description=
                    'Name of the bucket to which the PUT operation was initiated.',
                    is_optional=False,
                ),
                'CacheControl':
                Field(
                    String,
                    description=
                    'Specifies caching behavior along the request/reply chain.',
                    is_optional=True,
                ),
                'ContentDisposition':
                Field(
                    String,
                    description=
                    'Specifies presentational information for the object.',
                    is_optional=True,
                ),
                'ContentEncoding':
                Field(
                    String,
                    description=
                    '''Specifies what content encodings have been applied to the object
                    and thus what decoding mechanisms must be applied to obtain the media-type
                    referenced by the Content-Type header field.''',
                    is_optional=True,
                ),
                'ContentLanguage':
                Field(String,
                      description='The language the content is in.',
                      is_optional=True),
                'ContentLength':
                Field(
                    Int,
                    description=
                    '''Size of the body in bytes. This parameter is useful when the size
                    of the body cannot be determined automatically.''',
                    is_optional=True,
                ),
                'ContentMD5':
                Field(
                    String,
                    description=
                    '''The base64-encoded 128-bit MD5 digest of the part data. This
                    parameter is auto-populated when using the command from the CLI''',
                    is_optional=True,
                ),
                'ContentType':
                Field(
                    String,
                    description=
                    'A standard MIME type describing the format of the object data.',
                    is_optional=True,
                ),
                # TODO: datetime object
                # # 'Expires': Field(datetime, description='The date and time at which the object is
                # no longer cacheable.', is_optional=True),
                'GrantFullControl':
                Field(
                    String,
                    description=
                    '''Gives the grantee READ, READ_ACP, and WRITE_ACP permissions on
                    the object.''',
                    is_optional=True,
                ),
                'GrantRead':
                Field(
                    String,
                    description=
                    'Allows grantee to read the object data and its metadata.',
                    is_optional=True,
                ),
                'GrantReadACP':
                Field(String,
                      description='Allows grantee to read the object ACL.',
                      is_optional=True),
                'GrantWriteACP':
                Field(
                    String,
                    description=
                    'Allows grantee to write the ACL for the applicable object.',
                    is_optional=True,
                ),
                'Key':
                Field(
                    String,
                    description=
                    'Object key for which the PUT operation was initiated.',
                    is_optional=False,
                ),
                'Metadata':
                Field(
                    PermissiveDict(),
                    description=
                    'A map of metadata to store with the object in S3.',
                    is_optional=True,
                ),
                'ServerSideEncryption':
                Field(
                    String,
                    description=
                    '''The Server-side encryption algorithm used when storing this
                    object in S3 (e.g., AES256, aws:kms).''',
                    is_optional=True,
                ),
                'StorageClass':
                Field(
                    String,
                    description=
                    '''The type of storage to use for the object. Defaults to
                    'STANDARD'.''',
                    is_optional=True,
                ),
                'WebsiteRedirectLocation':
                Field(
                    String,
                    description=
                    '''If the bucket is configured as a website, redirects requests for
                    this object to another object in the same bucket or to an external URL. Amazon
                    S3 stores the value of this header in the object metadata.''',
                    is_optional=True,
                ),
                'SSECustomerAlgorithm':
                Field(
                    String,
                    description=
                    '''Specifies the algorithm to use to when encrypting the object
                    (e.g., AES256).''',
                    is_optional=True,
                ),
                'SSECustomerKey':
                Field(
                    String,
                    description=
                    '''Specifies the customer-provided encryption key for Amazon S3 to
                    use in encrypting data. This value is used to store the object and then it is
                    discarded; Amazon does not store the encryption key. The key must be appropriate
                    for use with the algorithm specified in the
                    x-amz-server-side-encryption-customer-algorithm header.''',
                    is_optional=True,
                ),
                'SSECustomerKeyMD5':
                Field(
                    String,
                    description=
                    '''Specifies the 128-bit MD5 digest of the encryption key according
                    to RFC 1321. Amazon S3 uses this header for a message integrity check to ensure
                    the encryption key was transmitted without error.

                    Please note that this parameter is automatically populated if it is not
                    provided. Including this parameter is not required''',
                    is_optional=True,
                ),
                'SSEKMSKeyId':
                Field(
                    String,
                    description=
                    '''Specifies the AWS KMS key ID to use for object encryption. All
                    GET and PUT requests for an object protected by AWS KMS will fail if not made
                    via SSL or using SigV4. Documentation on configuring any of the officially
                    supported AWS SDKs and CLI can be found at
                    http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#specify-signature-version''',
                    is_optional=True,
                ),
                'RequestPayer':
                Field(
                    String,
                    description=
                    '''Confirms that the requester knows that she or he will be charged
                    for the request. Bucket owners need not specify this parameter in their
                    requests. Documentation on downloading objects from requester pays buckets can
                    be found at
                    http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html''',
                    is_optional=True,
                ),
                'Tagging':
                Field(
                    String,
                    description=
                    '''The tag-set for the object. The tag-set must be encoded as URL
                    Query parameters. (For example, "Key1=Value1")''',
                    is_optional=True,
                ),
                'ObjectLockMode':
                Field(
                    String,
                    description=
                    'The Object Lock mode that you want to apply to this object.',
                    is_optional=True,
                ),
                # TODO: datetime object 'ObjectLockRetainUntilDate': Field(datetime,
                # description='The date and time when you want this object\'s Object Lock to
                # expire.', is_optional=True),
                'ObjectLockLegalHoldStatus':
                Field(
                    String,
                    description=
                    '''The Legal Hold status that you want to apply to the specified
                    object.''',
                    is_optional=True,
                ),
            }))
                '\'localhost\')}//\'.'),
        ),
        'backend':
        Field(
            String,
            is_optional=True,
            default_value='rpc://',
            description=
            'The URL of the Celery results backend. Default: \'rpc://\'.',
        ),
        'include':
        Field(List[String],
              is_optional=True,
              description='List of modules every worker should import'),
        'config_source':
        Field(PermissiveDict(),
              is_optional=True,
              description='Settings for the Celery app.'),
    },
)
def celery_executor(init_context):
    '''Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)
    
    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.