Exemple #1
0
class S3ActionTypes(object):
    copy = ActionType(
        name='copy',
        description='Accomplishes s3 source to s3 destination copy',
        params_json_schema={
            'type': 'object',
            'properties': {
                'from_path': {
                    'type': 'string',
                    'pattern': '^s3://.+$',
                    'description': 'The source s3 file path'
                },
                'to_path': {
                    'type': 'string',
                    'pattern': '^s3://.+$',
                    'description': 'The destination s3 file path'
                },
                'recursive': {
                    'type': ['boolean', 'null'],
                    'default':
                    True,
                    'description':
                    'Performs recursive copy of source to destination'
                },
                'additionalProperties': False,
                'required': ['from_path', 'to_path']
            }
        })
Exemple #2
0
class DynamoDBActionTypes(object):
    create_table = ActionType(
        name='create_table',
        description='creates a new DynamoDB table, where the hash key, sort key, attributes, and their types are '
                    'inferred from datastore\'s dataset',
        params_json_schema={
            'type': 'object',
            'properties': {
                'read_capacity_units': {'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial read throughput'},
                'write_capacity_units': {'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial write throughput'},
            },
            'additionalProperties': False,
            'required': ['read_capacity_units', 'write_capacity_units'],
        }
    )
    delete_table = ActionType(
        name='delete_table',
        description='deletes a DynamoDB table',
    )
    load_dataset = ActionType(
        name='load_dataset',
        description='loads the data from this datastore\'s dataset',
        params_json_schema={
            'type': 'object',
            'properties': {
                's3_path_start_prefix_inclusive': {'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix'},
                's3_path_end_prefix_exclusive': {'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix'},
                's3_path_regex_filter': {'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match'},
                'initial_write_capacity_units': {'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value'},
                'final_write_capacity_units': {'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value'},
                'write_capacity_utilization_percent': {
                    'type': ['number', 'null'],
                    'default': 0.5,
                    'minimum': 0.1,
                    'maximum': 1.5,
                    'description': 'the percentage of write capacity units to utilize'
                },
            },
            'additionalProperties': False,
            'required': [],
        }
    )
Exemple #3
0
class EmrActionTypes(object):
    start_datastore = ActionType(
        name='start_datastore',
        description='Start this datastore for the first time',
        params_json_schema={
            'type': 'object',
            'properties': {
                'bootstrap_script': {
                    'type': ['string', 'null'],
                    'description':
                    'The contents of this script will be executed as a bootstrap step',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
                'configuration_overrides': {
                    'type': ['string', 'null'],
                    'description': """
                        Optional configuration overrides as a JSON array of Amazon EMR Configuration objects.
                        See the following for a detailed description of the format:
                        http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-configure-apps.html
                        """,
                    "x-schema-form": {
                        "type": "textarea"
                    }
                }
            },
            'additionalProperties': False,
            'required': [],
        })
    terminate_datastore = ActionType(
        name='terminate_datastore',
        description='Permanently destroy this datastore')
    run_hive_script_action = ActionType(
        name='run_hive_script',
        description='Run the provided hive script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the hive script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    run_impala_script_action = ActionType(
        name='run_impala_script',
        description='Run the provided impala script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the impala script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    run_pyspark_script_action = ActionType(
        name='run_pyspark_script',
        description='Run the provided pyspark script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the pyspark script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    copy_hdfs_to_s3_action = ActionType(
        name='copy_hdfs_to_s3',
        description='Copies data at the specified hdfs path to s3',
        params_json_schema={
            'type': 'object',
            'properties': {
                'source_hdfs_path': {
                    'type':
                    'string',
                    'pattern':
                    '^hdfs://.+$',
                    'description':
                    'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table'
                },
                'destination_s3_path': {
                    'type':
                    'string',
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The destination s3 path, e.g. s3://bucket/prefix'
                },
            },
            'additionalProperties': False,
            'required': ['source_hdfs_path', 'destination_s3_path'],
        })
    load_dataset = ActionType(
        name='load_dataset',
        description='Copies the dataset from s3 to the datastore',
        params_json_schema={
            'type': 'object',
            'properties': {
                'dataset_id': {
                    'type': 'string',
                    'description': 'The id of the dataset to load'
                },
                's3_path_start_prefix_inclusive_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_start_prefix_inclusive': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The inclusive s3 path start prefix. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                's3_path_end_prefix_exclusive_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_end_prefix_exclusive': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The exclusive s3 path end prefix. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                's3_path_regex_filter_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_regex_filter': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'description':
                    'A regex pattern the s3 path must match. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'target_file_format': {
                    'type': ['string', 'null'],
                    'enum': FileFormat.all(),
                    'default': FileFormat.PARQUET,
                    'description': 'overrides dataset setting'
                },
                'target_row_format': {
                    'type': ['string', 'null'],
                    'enum': RowFormat.all(),
                    'default': RowFormat.NONE,
                    'description': 'overrides dataset setting'
                },
                'target_compression': {
                    'type': ['string', 'null'],
                    'enum': Compression.all(),
                    'default': Compression.SNAPPY,
                    'description': 'overrides dataset setting'
                },
                'target_delimited_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_quoted_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_escaped_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_null_string': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
            },
            'additionalProperties': False,
            'required': ['dataset_id'],
        })
    consume_subscription = ActionType(
        name='consume_subscription',
        description='Consumes the next available dataset subscription elements',
        params_json_schema={
            'type': 'object',
            'properties': {
                'subscription_id': {
                    'type': 'string',
                    'description': 'The id of the subscription to consume'
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_file_format': {
                    'type': ['string', 'null'],
                    'enum': FileFormat.all(),
                    'default': FileFormat.TEXTFILE,
                    'description': 'overrides dataset setting'
                },
                'target_row_format': {
                    'type': ['string', 'null'],
                    'enum': RowFormat.all(),
                    'default': RowFormat.DELIMITED,
                    'description': 'overrides dataset setting'
                },
                'target_compression': {
                    'type': ['string', 'null'],
                    'enum': Compression.all(),
                    'default': Compression.GZIP,
                    'description': 'overrides dataset setting'
                },
                'target_delimited_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_quoted_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_escaped_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_null_string': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
            },
            'additionalProperties': False,
            'required': ['subscription_id'],
        })
Exemple #4
0
class S3ActionTypes(object):
    copy = ActionType(
        name='copy',
        description=
        'Accomplishes s3 source to s3 destination copy, giving the destination bucket owner full control',
        params_json_schema={
            'type': 'object',
            'properties': {
                'from_path': {
                    'type': 'string',
                    'pattern': '^s3://.+$',
                    'description': 'The source s3 file path'
                },
                'to_path': {
                    'type': 'string',
                    'pattern': '^s3://.+$',
                    'description': 'The destination s3 file path'
                },
                'recursive': {
                    'type': ['boolean', 'null'],
                    'default':
                    True,
                    'description':
                    'Performs recursive copy of source to destination'
                },
                'additionalProperties': False,
                'required': ['from_path', 'to_path']
            }
        })

    data_check = ActionType(
        name='data_check',
        description=
        'A data check that passes if an s3 key/file exists that matches the specified requirements',
        params_json_schema={
            'type': 'object',
            'properties': {
                's3_path_prefix': {
                    'type': ['string', 'null'],
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The s3 path prefix where at least one s3 key/file should exist, e.g. '
                    's3://bucket/prefix. The following values (with braces) will be substituted with '
                    'the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, {DAY}, {HOUR}, '
                    '{MINUTE}, {SECOND}'
                },
                's3_path_regex': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'description':
                    'A regex pattern the s3 path must match. The following values (with braces) will '
                    'be substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH},'
                    ' {DAY}, {HOUR}, {MINUTE}, {SECOND}'
                },
                'min_file_size_in_bytes': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'minimum':
                    0,
                    'description':
                    'If specified, at least one file matching the provided regex must be at least this size',
                },
                'date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount',
                },
                's3_file_last_modified': {
                    'type': ['boolean', 'null'],
                    'default':
                    False,
                    'description':
                    'If specified, the data check factors last modified date into the data check',
                },
            },
            'additionalProperties': False,
        },
    )
Exemple #5
0
class NoOpActionTypes(object):
    action_that_succeeds = ActionType(
        name='fake_action_that_succeeds',
        description='helps engineers develop and test')
    action_that_fails = ActionType(
        name='fake_action_that_fails',
        description='helps engineers develop and test')
    copy_hdfs_to_s3_action = ActionType(
        name='fake_copy_hdfs_to_s3',
        description='Copies data at the specified hdfs path to s3',
        params_json_schema={
            'type': 'object',
            'properties': {
                'source_hdfs_path': {
                    'type':
                    'string',
                    'pattern':
                    '^hdfs://.+$',
                    'description':
                    'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table'
                },
                'destination_s3_path': {
                    'type':
                    'string',
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The destination s3 path, e.g. s3://bucket/prefix'
                },
            },
            'additionalProperties': False,
            'required': ['source_hdfs_path', 'destination_s3_path'],
        })
    load_dataset = ActionType(
        name='fake_load_dataset',
        description='Copies the dataset from s3 to the datastore',
        params_json_schema={
            'type': 'object',
            'properties': {
                'dataset_id': {
                    'type': 'string',
                    'description': 'The id of the dataset to load'
                },
            },
            'additionalProperties': False,
            'required': ['dataset_id'],
        })
    consume_subscription = ActionType(
        name='consume_subscription',
        description='Consumes the next available dataset subscription elements',
        params_json_schema={
            'type': 'object',
            'properties': {
                'subscription_id': {
                    'type': 'string',
                    'description': 'The id of the subscription to consume'
                },
            },
            'additionalProperties': False,
            'required': ['subscription_id'],
        })
Exemple #6
0
class DynamoDBActionTypes(object):
    create_table = ActionType(
        name='create_table',
        description=
        'creates a new DynamoDB table, where the hash key, sort key, attributes, and their types are '
        'inferred from datastore\'s dataset',
        params_json_schema={
            'type': 'object',
            'properties': {
                'read_capacity_units': {
                    'type': ['integer', 'null'],
                    'minimum': 1,
                    'default': 25,
                    'description': 'the initial read throughput'
                },
                'write_capacity_units': {
                    'type': ['integer', 'null'],
                    'minimum': 1,
                    'default': 25,
                    'description': 'the initial write throughput'
                },
            },
            'additionalProperties': False,
            'required': ['read_capacity_units', 'write_capacity_units'],
        })
    delete_table = ActionType(
        name='delete_table',
        description='deletes a DynamoDB table',
    )
    load_dataset = ActionType(
        name='load_dataset',
        description='loads the data from this datastore\'s dataset',
        params_json_schema={
            'type': 'object',
            'properties': {
                's3_path_start_prefix_inclusive_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_start_prefix_inclusive': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The inclusive s3 path start prefix. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                's3_path_end_prefix_exclusive_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_end_prefix_exclusive': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The exclusive s3 path end prefix. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                's3_path_regex_filter_date_offset_in_seconds': {
                    'type': ['integer', 'null'],
                    'default':
                    0,
                    'description':
                    'If specified, the date used in s3 path substitutions will be adjusted by this amount'
                },
                's3_path_regex_filter': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'description':
                    'A regex pattern the s3 path must match. The following values (with braces) will be '
                    'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, '
                    '{DAY}, {HOUR}, {MINUTE}, {SECOND}',
                },
                'initial_write_capacity_units': {
                    'type': ['integer', 'null'],
                    'description': 'leave blank to avoid changing this value'
                },
                'final_write_capacity_units': {
                    'type': ['integer', 'null'],
                    'description': 'leave blank to avoid changing this value'
                },
                'write_capacity_utilization_percent': {
                    'type': ['number', 'null'],
                    'default':
                    0.5,
                    'minimum':
                    0.1,
                    'maximum':
                    1.5,
                    'description':
                    'the percentage of write capacity units to utilize'
                },
            },
            'additionalProperties': False,
            'required': [],
        })
Exemple #7
0
class EmrActionTypes(object):
    start_datastore = ActionType(
        name='start_datastore',
        description='Start this datastore for the first time')
    terminate_datastore = ActionType(
        name='terminate_datastore',
        description='Permanently destroy this datastore')
    run_hive_script_action = ActionType(
        name='run_hive_script',
        description='Run the provided hive script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the hive script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    run_impala_script_action = ActionType(
        name='run_impala_script',
        description='Run the provided impala script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the impala script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    run_pyspark_script_action = ActionType(
        name='run_pyspark_script',
        description='Run the provided pyspark script on the EMR cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'script_contents': {
                    'type': 'string',
                    'description':
                    'The contents of the pyspark script to execute',
                    "x-schema-form": {
                        "type": "textarea"
                    }
                },
            },
            'additionalProperties': False,
            'required': ['script_contents'],
        })
    copy_hdfs_to_s3_action = ActionType(
        name='copy_hdfs_to_s3',
        description='Copies data at the specified hdfs path to s3',
        params_json_schema={
            'type': 'object',
            'properties': {
                'source_hdfs_path': {
                    'type':
                    'string',
                    'pattern':
                    '^hdfs://.+$',
                    'description':
                    'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table'
                },
                'destination_s3_path': {
                    'type':
                    'string',
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The destination s3 path, e.g. s3://bucket/prefix'
                },
            },
            'additionalProperties': False,
            'required': ['source_hdfs_path', 'destination_s3_path'],
        })
    load_dataset = ActionType(
        name='load_dataset',
        description='Copies the dataset from s3 to the datastore',
        params_json_schema={
            'type': 'object',
            'properties': {
                'dataset_id': {
                    'type': 'string',
                    'description': 'The id of the dataset to load'
                },
                's3_path_start_prefix_inclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The inclusive s3 path start prefix'
                },
                's3_path_end_prefix_exclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The exclusive s3 path end prefix'
                },
                's3_path_regex_filter': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'A regex pattern the s3 path must match'
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'target_file_format': {
                    'type': ['string', 'null'],
                    'enum': FileFormat.all(),
                    'default': FileFormat.PARQUET,
                    'description': 'overrides dataset setting'
                },
                'target_row_format': {
                    'type': ['string', 'null'],
                    'enum': RowFormat.all(),
                    'default': RowFormat.NONE,
                    'description': 'overrides dataset setting'
                },
                'target_compression': {
                    'type': ['string', 'null'],
                    'enum': Compression.all(),
                    'default': Compression.SNAPPY,
                    'description': 'overrides dataset setting'
                },
                'target_delimited_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_quoted_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_escaped_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_null_string': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
            },
            'additionalProperties': False,
            'required': ['dataset_id'],
        })
    consume_subscription = ActionType(
        name='consume_subscription',
        description='Consumes the next available dataset subscription elements',
        params_json_schema={
            'type': 'object',
            'properties': {
                'subscription_id': {
                    'type': 'string',
                    'description': 'The id of the subscription to consume'
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_file_format': {
                    'type': ['string', 'null'],
                    'enum': FileFormat.all(),
                    'default': FileFormat.TEXTFILE,
                    'description': 'overrides dataset setting'
                },
                'target_row_format': {
                    'type': ['string', 'null'],
                    'enum': RowFormat.all(),
                    'default': RowFormat.DELIMITED,
                    'description': 'overrides dataset setting'
                },
                'target_compression': {
                    'type': ['string', 'null'],
                    'enum': Compression.all(),
                    'default': Compression.GZIP,
                    'description': 'overrides dataset setting'
                },
                'target_delimited_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_quoted_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_escaped_by': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
                'target_null_string': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'overrides dataset setting'
                },
            },
            'additionalProperties': False,
            'required': ['subscription_id'],
        })
Exemple #8
0
class RedshiftActionTypes(object):
    start_datastore = ActionType(
        name='start_datastore',
        description='create or restore this Redshift cluster',
        params_json_schema={
            'type': 'object',
            'properties': {
                'snapshot_name': {
                    'type': ['string', 'null'],
                    'default':
                    None,
                    'description':
                    'the cluster will be restored from this snapshot, or else the latest if one exists'
                    ' (otherwise, a new cluster will be created)'
                },
            },
            'additionalProperties': False,
        },
    )
    stop_datastore = ActionType(
        name='stop_datastore',
        description='Stops this Redshift cluster and creates a final snapshot',
    )
    create_snapshot = ActionType(
        name='create_snapshot',
        description=
        'create a snapshot of this cluster in the form "dart-datastore-<id>-<YYYYmmddHHMMSS>"',
    )
    execute_sql = ActionType(
        name='execute_sql',
        description='Executes a user defined SQL script',
        params_json_schema={
            'type': 'object',
            'properties': {
                'sql_script': {
                    'type': 'string',
                    'x-schema-form': {
                        'type': 'textarea'
                    },
                    'description': 'The SQL script to be executed'
                },
            },
            'additionalProperties': False,
            'required': ['sql_script'],
        },
    )
    load_dataset = ActionType(
        name='load_dataset',
        description='Copies the dataset from s3 to the datastore',
        params_json_schema={
            'type': 'object',
            'properties': {
                'dataset_id': {
                    'type': 'string',
                    'description': 'The id of the dataset to load'
                },
                's3_path_start_prefix_inclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The inclusive s3 path start prefix'
                },
                's3_path_end_prefix_exclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The exclusive s3 path end prefix'
                },
                's3_path_regex_filter': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'A regex pattern the s3 path must match'
                },
                'target_schema_name': {
                    'type': ['string', 'null'],
                    'default': 'public',
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'created if absent'
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'target_sort_keys': {
                    'type': 'array',
                    'default': [],
                    'maxItems': 400,
                    'description': 'overrides dataset setting',
                    'x-schema-form': {
                        'type': 'tabarray',
                        'title': "{{ value || 'sort_key ' + $index }}"
                    },
                    'items': {
                        'type': 'string',
                        'pattern': '^[a-zA-Z0-9_]+$',
                        'maxLength': 127
                    }
                },
                'target_distribution_key': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'distribution_style': {
                    'type': 'string',
                    'default': 'EVEN',
                    'enum': ['EVEN', 'ALL'],
                    'description': 'ignored if dist_key is chosen'
                },
                'sort_keys_interleaved': {
                    'type': ['boolean', 'null'],
                    'default': False,
                    'description': 'see AWS Redshift docs'
                },
                'truncate_columns': {
                    'type': ['boolean', 'null'],
                    'default': True
                },
                'max_errors': {
                    'type': ['integer', 'null'],
                    'default': 0,
                    'minimum': 0
                },
                'batch_size': {
                    'type': ['integer', 'null'],
                    'default': 0,
                    'minimum': 0
                },
            },
            'additionalProperties': False,
            'required': ['dataset_id'],
        })
    consume_subscription = ActionType(
        name='consume_subscription',
        description='Consumes the next available dataset subscription elements',
        params_json_schema={
            'type': 'object',
            'properties': {
                'subscription_id': {
                    'type': 'string',
                    'description': 'The id of the subscription to consume'
                },
                's3_path_start_prefix_inclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The inclusive s3 path start prefix'
                },
                's3_path_end_prefix_exclusive': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^s3://.+$',
                    'description': 'The exclusive s3 path end prefix'
                },
                's3_path_regex_filter': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'A regex pattern the s3 path must match'
                },
                'target_schema_name': {
                    'type': ['string', 'null'],
                    'default': 'public',
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'created if absent'
                },
                'target_table_name': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'target_sort_keys': {
                    'type': 'array',
                    'default': [],
                    'maxItems': 400,
                    'description': 'overrides dataset setting',
                    'x-schema-form': {
                        'type': 'tabarray',
                        'title': "{{ value || 'sort_key ' + $index }}"
                    },
                    'items': {
                        'type': 'string',
                        'pattern': '^[a-zA-Z0-9_]+$',
                        'maxLength': 127
                    }
                },
                'target_distribution_key': {
                    'type': ['string', 'null'],
                    'default': None,
                    'pattern': '^[a-zA-Z0-9_]+$',
                    'description': 'overrides dataset setting'
                },
                'distribution_style': {
                    'type': 'string',
                    'default': 'EVEN',
                    'enum': ['EVEN', 'ALL'],
                    'description': 'ignored if dist_key is chosen'
                },
                'sort_keys_interleaved': {
                    'type': ['boolean', 'null'],
                    'default': False,
                    'description': 'see AWS Redshift docs'
                },
                'truncate_columns': {
                    'type': ['boolean', 'null'],
                    'default': True
                },
                'max_errors': {
                    'type': ['integer', 'null'],
                    'default': 0,
                    'minimum': 0
                },
                'batch_size': {
                    'type': ['integer', 'null'],
                    'default': 0,
                    'minimum': 0
                },
            },
            'additionalProperties': False,
            'required': ['subscription_id'],
        })
    copy_to_s3 = ActionType(
        name='copy_to_s3',
        description='exports the results of a sql statement to s3',
        params_json_schema={
            'type': 'object',
            'properties': {
                'delimiter': {
                    'type': ['string', 'null'],
                    'default': '\t',
                    'description': 'field delimiter'
                },
                'source_sql_statement': {
                    'type': 'string',
                    "x-schema-form": {
                        "type": "textarea"
                    },
                    'description': 'the SQL SELECT statement to be executed'
                },
                'destination_s3_path': {
                    'type':
                    'string',
                    'pattern':
                    '^s3://.+$',
                    'description':
                    'The destination s3 path, e.g. s3://bucket/prefix.  The following values (with braces)'
                    ' will be substituted with the appropriate zero-padded values at runtime:'
                    '{YEAR}, {MONTH}, {DAY}, {HOUR}, [MINUTE}, {SECOND}'
                },
                'parallel': {
                    'type': 'boolean',
                    'default': True,
                    'description': 'if false, unload sequentially as one file'
                },
            },
            'additionalProperties': False,
            'required': ['source_sql_statement', 'destination_s3_path'],
        },
    )
    data_check = ActionType(
        name='data_check',
        description='Executes a user defined, SQL data check',
        params_json_schema={
            'type': 'object',
            'properties': {
                'sql_script': {
                    'type':
                    'string',
                    'x-schema-form': {
                        'type': 'textarea'
                    },
                    'description':
                    'this SQL should return one row that is true (for "passed") or false (for "failed")'
                },
            },
            'additionalProperties': False,
            'required': ['sql_script'],
        },
    )
Exemple #9
0
class ElasticsearchActionTypes(object):
    data_check = ActionType(
        name='data_check',
        description='Executes a user defined, Elasticsearch data check',
        params_json_schema={
            'type': 'object',
            'properties': {
                'index': {
                    'type': 'string',
                    'default': '_all',
                    'description': 'The Elasticsearch index to perform the query on. '
                                   + 'Leave blank or explicitly set to "_all" to perform the query on all indices.'
                },
                'document_type': {
                    'type': ['string', 'null'],
                    'default': None,
                    'description': 'The Elasticsearch document type to perform the query on. '
                                   + 'Leave blank to perform the query on all document types.'
                },
                'query_body': {
                    'type': 'string',
                    'x-schema-form': {'type': 'textarea'},
                    'description': 'The Elasticsearch query should return a response that contains at least one result '
                                   + '("hits" in Elasticsearch terminology") for the data check to pass. '
                                   + 'https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl.html'
                },
                'expected_count': {
                    'type': 'integer',
                    'default': 0,
                    'description': 'The expected count to of documents to be returned by the query. '
                                   + ' Use this and the operator to return a truthy value for the data check to pass.'
                },
                'operator': {
                    'type': 'string',
                    'default': '>',
                    'description': 'The operator to apply to the query and expected count. '
                                  + 'i.e. result count > expected count ',
                    'enum': [
                        '>',
                        '>=',
                        '<',
                        '<=',
                        '==',
                        '!='
                    ]
                }
            },
            'additionalProperties': False,
            'required': ['query_body'],
        },
    )

    create_index = ActionType(
        name='create_index',
        description='Creates an Elasticsearch index',
        params_json_schema={
            'type': 'object',
            'properties': {
                'index': {
                    'type': 'string',
                    'default': '_all',
                    'description': 'The Elasticsearch index to create.'
                },
                'mapping': {
                    'type': 'string',
                    'x-schema-form': {'type': 'textarea'},
                    'description': 'The Elasticsearch index mapping.'
                },
            },
            'additionalProperties': False,
            'required': ['index'],
        },
    )

    create_mapping = ActionType(
        name='create_mapping',
        description='Creates an Elasticsearch mapping',
        params_json_schema={
            'type': 'object',
            'properties': {
                'index': {
                    'type': 'string',
                    'default': '_all',
                    'description': 'The Elasticsearch index to create the mapping for.'
                                   + 'Explicitly set to "_all" or leave blank to create the mapping for all indices.'
                },
                'document_type': {
                    'type': 'string',
                    'description': 'The Elasticsearch document type to create the mapping for.'
                },
                'mapping': {
                    'type': 'string',
                    'x-schema-form': {'type': 'textarea'},
                    'description': 'The Elasticsearch mapping.'
                },
            },
            'additionalProperties': False,
            'required': ['mapping', 'document_type'],
        },
    )

    create_template = ActionType(
        name='create_template',
        description='Creates an Elasticsearch template',
        params_json_schema={
            'type': 'object',
            'properties': {
                'template_name': {
                    'type': 'string',
                    'description': 'The Elasticsearch template name to create.'
                },
                'template': {
                    'type': 'string',
                    'x-schema-form': {'type': 'textarea'},
                    'description': 'The Elasticsearch template.'
                },
            },
            'additionalProperties': False,
            'required': ['template', 'template_name'],
        },
    )

    delete_index = ActionType(
        name='delete_index',
        description='Deletes an Elasticsearch index',
        params_json_schema={
            'type': 'object',
            'properties': {
                'index': {
                    'type': 'string',
                    'default': '_all',
                    'description': 'The Elasticsearch index to delete.'
                }
            },
            'additionalProperties': False,
            'required': ['index'],
        },
    )

    delete_template = ActionType(
        name='delete_template',
        description='Deletes an Elasticsearch template',
        params_json_schema={
            'type': 'object',
            'properties': {
                'template_name': {
                    'type': 'string',
                    'description': 'The Elasticsearch template name to delete.'
                },
            },
            'additionalProperties': False,
            'required': ['template_name'],
        },
    )

    force_merge_index = ActionType(
        name='force_merge_index',
        description='Force merges an Elasticsearch index',
        params_json_schema={
            'type': 'object',
            'properties': {
                'index': {
                    'type': 'string',
                    'default': '_all',
                    'description': 'A comma-separated list of index names; use \"_all" or empty string to perform the operation on all indices.'
                },
                'flush': {
                    'type': 'boolean',
                    'default': True,
                    'description': 'Specify whether the index should be flushed after performing the operation'
                },
                'allow_no_indices': {
                  'type': 'boolean',
                  'default': False,
                  'description': 'Whether to ignore if a wildcard indices expression resolves into no concrete indices.'
                                 + '(This includes "_all" string or when no indices have been specified)'
                },
                'expand_wildcards': {
                    'type': 'string',
                    'default': 'open',
                    'pattern': '^(open|closed|none|all)$',
                    'description': 'Whether to expand wildcard expression to concrete indices that are open, closed or '
                                    + 'both. default is "open". valid choices are: "open", "closed", "none", "all"'
                },
                'max_num_segments': {
                    'type': ['integer', 'null'],
                    'default': None,
                    'description': 'The number of segments the index should be merged into (default: dynamic)'
                },
                'only_expunge_deletes': {
                    'type': 'boolean',
                    'default': False,
                    'description': 'Specify whether the operation should only expunge deleted documents'
                },
                'wait_for_merge': {
                    'type': 'boolean',
                    'default': True,
                    'description': 'Specify whether the request should block until the merge process is finished'
                }
            },
            'additionalProperties': False,
            'required': ['index'],
        },
    )