class S3ActionTypes(object): copy = ActionType( name='copy', description='Accomplishes s3 source to s3 destination copy', params_json_schema={ 'type': 'object', 'properties': { 'from_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The source s3 file path' }, 'to_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 file path' }, 'recursive': { 'type': ['boolean', 'null'], 'default': True, 'description': 'Performs recursive copy of source to destination' }, 'additionalProperties': False, 'required': ['from_path', 'to_path'] } })
class DynamoDBActionTypes(object): create_table = ActionType( name='create_table', description='creates a new DynamoDB table, where the hash key, sort key, attributes, and their types are ' 'inferred from datastore\'s dataset', params_json_schema={ 'type': 'object', 'properties': { 'read_capacity_units': {'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial read throughput'}, 'write_capacity_units': {'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial write throughput'}, }, 'additionalProperties': False, 'required': ['read_capacity_units', 'write_capacity_units'], } ) delete_table = ActionType( name='delete_table', description='deletes a DynamoDB table', ) load_dataset = ActionType( name='load_dataset', description='loads the data from this datastore\'s dataset', params_json_schema={ 'type': 'object', 'properties': { 's3_path_start_prefix_inclusive': {'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix'}, 's3_path_end_prefix_exclusive': {'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix'}, 's3_path_regex_filter': {'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match'}, 'initial_write_capacity_units': {'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value'}, 'final_write_capacity_units': {'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value'}, 'write_capacity_utilization_percent': { 'type': ['number', 'null'], 'default': 0.5, 'minimum': 0.1, 'maximum': 1.5, 'description': 'the percentage of write capacity units to utilize' }, }, 'additionalProperties': False, 'required': [], } )
class EmrActionTypes(object): start_datastore = ActionType( name='start_datastore', description='Start this datastore for the first time', params_json_schema={ 'type': 'object', 'properties': { 'bootstrap_script': { 'type': ['string', 'null'], 'description': 'The contents of this script will be executed as a bootstrap step', "x-schema-form": { "type": "textarea" } }, 'configuration_overrides': { 'type': ['string', 'null'], 'description': """ Optional configuration overrides as a JSON array of Amazon EMR Configuration objects. See the following for a detailed description of the format: http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-configure-apps.html """, "x-schema-form": { "type": "textarea" } } }, 'additionalProperties': False, 'required': [], }) terminate_datastore = ActionType( name='terminate_datastore', description='Permanently destroy this datastore') run_hive_script_action = ActionType( name='run_hive_script', description='Run the provided hive script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the hive script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) run_impala_script_action = ActionType( name='run_impala_script', description='Run the provided impala script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the impala script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) run_pyspark_script_action = ActionType( name='run_pyspark_script', description='Run the provided pyspark script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the pyspark script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) copy_hdfs_to_s3_action = ActionType( name='copy_hdfs_to_s3', description='Copies data at the specified hdfs path to s3', params_json_schema={ 'type': 'object', 'properties': { 'source_hdfs_path': { 'type': 'string', 'pattern': '^hdfs://.+$', 'description': 'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table' }, 'destination_s3_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 path, e.g. s3://bucket/prefix' }, }, 'additionalProperties': False, 'required': ['source_hdfs_path', 'destination_s3_path'], }) load_dataset = ActionType( name='load_dataset', description='Copies the dataset from s3 to the datastore', params_json_schema={ 'type': 'object', 'properties': { 'dataset_id': { 'type': 'string', 'description': 'The id of the dataset to load' }, 's3_path_start_prefix_inclusive_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_start_prefix_inclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 's3_path_end_prefix_exclusive_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_end_prefix_exclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 's3_path_regex_filter_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_regex_filter': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 'target_table_name': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_file_format': { 'type': ['string', 'null'], 'enum': FileFormat.all(), 'default': FileFormat.PARQUET, 'description': 'overrides dataset setting' }, 'target_row_format': { 'type': ['string', 'null'], 'enum': RowFormat.all(), 'default': RowFormat.NONE, 'description': 'overrides dataset setting' }, 'target_compression': { 'type': ['string', 'null'], 'enum': Compression.all(), 'default': Compression.SNAPPY, 'description': 'overrides dataset setting' }, 'target_delimited_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_quoted_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_escaped_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_null_string': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, }, 'additionalProperties': False, 'required': ['dataset_id'], }) consume_subscription = ActionType( name='consume_subscription', description='Consumes the next available dataset subscription elements', params_json_schema={ 'type': 'object', 'properties': { 'subscription_id': { 'type': 'string', 'description': 'The id of the subscription to consume' }, 'target_table_name': { 'type': ['string', 'null'], 'pattern': '^[a-zA-Z0-9_]+$', 'default': None, 'description': 'overrides dataset setting' }, 'target_file_format': { 'type': ['string', 'null'], 'enum': FileFormat.all(), 'default': FileFormat.TEXTFILE, 'description': 'overrides dataset setting' }, 'target_row_format': { 'type': ['string', 'null'], 'enum': RowFormat.all(), 'default': RowFormat.DELIMITED, 'description': 'overrides dataset setting' }, 'target_compression': { 'type': ['string', 'null'], 'enum': Compression.all(), 'default': Compression.GZIP, 'description': 'overrides dataset setting' }, 'target_delimited_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_quoted_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_escaped_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_null_string': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, }, 'additionalProperties': False, 'required': ['subscription_id'], })
class S3ActionTypes(object): copy = ActionType( name='copy', description= 'Accomplishes s3 source to s3 destination copy, giving the destination bucket owner full control', params_json_schema={ 'type': 'object', 'properties': { 'from_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The source s3 file path' }, 'to_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 file path' }, 'recursive': { 'type': ['boolean', 'null'], 'default': True, 'description': 'Performs recursive copy of source to destination' }, 'additionalProperties': False, 'required': ['from_path', 'to_path'] } }) data_check = ActionType( name='data_check', description= 'A data check that passes if an s3 key/file exists that matches the specified requirements', params_json_schema={ 'type': 'object', 'properties': { 's3_path_prefix': { 'type': ['string', 'null'], 'pattern': '^s3://.+$', 'description': 'The s3 path prefix where at least one s3 key/file should exist, e.g. ' 's3://bucket/prefix. The following values (with braces) will be substituted with ' 'the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, {DAY}, {HOUR}, ' '{MINUTE}, {SECOND}' }, 's3_path_regex': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match. The following values (with braces) will ' 'be substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH},' ' {DAY}, {HOUR}, {MINUTE}, {SECOND}' }, 'min_file_size_in_bytes': { 'type': ['integer', 'null'], 'default': 0, 'minimum': 0, 'description': 'If specified, at least one file matching the provided regex must be at least this size', }, 'date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount', }, 's3_file_last_modified': { 'type': ['boolean', 'null'], 'default': False, 'description': 'If specified, the data check factors last modified date into the data check', }, }, 'additionalProperties': False, }, )
class NoOpActionTypes(object): action_that_succeeds = ActionType( name='fake_action_that_succeeds', description='helps engineers develop and test') action_that_fails = ActionType( name='fake_action_that_fails', description='helps engineers develop and test') copy_hdfs_to_s3_action = ActionType( name='fake_copy_hdfs_to_s3', description='Copies data at the specified hdfs path to s3', params_json_schema={ 'type': 'object', 'properties': { 'source_hdfs_path': { 'type': 'string', 'pattern': '^hdfs://.+$', 'description': 'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table' }, 'destination_s3_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 path, e.g. s3://bucket/prefix' }, }, 'additionalProperties': False, 'required': ['source_hdfs_path', 'destination_s3_path'], }) load_dataset = ActionType( name='fake_load_dataset', description='Copies the dataset from s3 to the datastore', params_json_schema={ 'type': 'object', 'properties': { 'dataset_id': { 'type': 'string', 'description': 'The id of the dataset to load' }, }, 'additionalProperties': False, 'required': ['dataset_id'], }) consume_subscription = ActionType( name='consume_subscription', description='Consumes the next available dataset subscription elements', params_json_schema={ 'type': 'object', 'properties': { 'subscription_id': { 'type': 'string', 'description': 'The id of the subscription to consume' }, }, 'additionalProperties': False, 'required': ['subscription_id'], })
class DynamoDBActionTypes(object): create_table = ActionType( name='create_table', description= 'creates a new DynamoDB table, where the hash key, sort key, attributes, and their types are ' 'inferred from datastore\'s dataset', params_json_schema={ 'type': 'object', 'properties': { 'read_capacity_units': { 'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial read throughput' }, 'write_capacity_units': { 'type': ['integer', 'null'], 'minimum': 1, 'default': 25, 'description': 'the initial write throughput' }, }, 'additionalProperties': False, 'required': ['read_capacity_units', 'write_capacity_units'], }) delete_table = ActionType( name='delete_table', description='deletes a DynamoDB table', ) load_dataset = ActionType( name='load_dataset', description='loads the data from this datastore\'s dataset', params_json_schema={ 'type': 'object', 'properties': { 's3_path_start_prefix_inclusive_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_start_prefix_inclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 's3_path_end_prefix_exclusive_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_end_prefix_exclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 's3_path_regex_filter_date_offset_in_seconds': { 'type': ['integer', 'null'], 'default': 0, 'description': 'If specified, the date used in s3 path substitutions will be adjusted by this amount' }, 's3_path_regex_filter': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match. The following values (with braces) will be ' 'substituted with the appropriate zero-padded values at runtime: {YEAR}, {MONTH}, ' '{DAY}, {HOUR}, {MINUTE}, {SECOND}', }, 'initial_write_capacity_units': { 'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value' }, 'final_write_capacity_units': { 'type': ['integer', 'null'], 'description': 'leave blank to avoid changing this value' }, 'write_capacity_utilization_percent': { 'type': ['number', 'null'], 'default': 0.5, 'minimum': 0.1, 'maximum': 1.5, 'description': 'the percentage of write capacity units to utilize' }, }, 'additionalProperties': False, 'required': [], })
class EmrActionTypes(object): start_datastore = ActionType( name='start_datastore', description='Start this datastore for the first time') terminate_datastore = ActionType( name='terminate_datastore', description='Permanently destroy this datastore') run_hive_script_action = ActionType( name='run_hive_script', description='Run the provided hive script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the hive script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) run_impala_script_action = ActionType( name='run_impala_script', description='Run the provided impala script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the impala script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) run_pyspark_script_action = ActionType( name='run_pyspark_script', description='Run the provided pyspark script on the EMR cluster', params_json_schema={ 'type': 'object', 'properties': { 'script_contents': { 'type': 'string', 'description': 'The contents of the pyspark script to execute', "x-schema-form": { "type": "textarea" } }, }, 'additionalProperties': False, 'required': ['script_contents'], }) copy_hdfs_to_s3_action = ActionType( name='copy_hdfs_to_s3', description='Copies data at the specified hdfs path to s3', params_json_schema={ 'type': 'object', 'properties': { 'source_hdfs_path': { 'type': 'string', 'pattern': '^hdfs://.+$', 'description': 'The source hdfs path, e.g. hdfs:///user/hive/warehouse/table' }, 'destination_s3_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 path, e.g. s3://bucket/prefix' }, }, 'additionalProperties': False, 'required': ['source_hdfs_path', 'destination_s3_path'], }) load_dataset = ActionType( name='load_dataset', description='Copies the dataset from s3 to the datastore', params_json_schema={ 'type': 'object', 'properties': { 'dataset_id': { 'type': 'string', 'description': 'The id of the dataset to load' }, 's3_path_start_prefix_inclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix' }, 's3_path_end_prefix_exclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix' }, 's3_path_regex_filter': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match' }, 'target_table_name': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_file_format': { 'type': ['string', 'null'], 'enum': FileFormat.all(), 'default': FileFormat.PARQUET, 'description': 'overrides dataset setting' }, 'target_row_format': { 'type': ['string', 'null'], 'enum': RowFormat.all(), 'default': RowFormat.NONE, 'description': 'overrides dataset setting' }, 'target_compression': { 'type': ['string', 'null'], 'enum': Compression.all(), 'default': Compression.SNAPPY, 'description': 'overrides dataset setting' }, 'target_delimited_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_quoted_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_escaped_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_null_string': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, }, 'additionalProperties': False, 'required': ['dataset_id'], }) consume_subscription = ActionType( name='consume_subscription', description='Consumes the next available dataset subscription elements', params_json_schema={ 'type': 'object', 'properties': { 'subscription_id': { 'type': 'string', 'description': 'The id of the subscription to consume' }, 'target_table_name': { 'type': ['string', 'null'], 'pattern': '^[a-zA-Z0-9_]+$', 'default': None, 'description': 'overrides dataset setting' }, 'target_file_format': { 'type': ['string', 'null'], 'enum': FileFormat.all(), 'default': FileFormat.TEXTFILE, 'description': 'overrides dataset setting' }, 'target_row_format': { 'type': ['string', 'null'], 'enum': RowFormat.all(), 'default': RowFormat.DELIMITED, 'description': 'overrides dataset setting' }, 'target_compression': { 'type': ['string', 'null'], 'enum': Compression.all(), 'default': Compression.GZIP, 'description': 'overrides dataset setting' }, 'target_delimited_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_quoted_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_escaped_by': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, 'target_null_string': { 'type': ['string', 'null'], 'default': None, 'description': 'overrides dataset setting' }, }, 'additionalProperties': False, 'required': ['subscription_id'], })
class RedshiftActionTypes(object): start_datastore = ActionType( name='start_datastore', description='create or restore this Redshift cluster', params_json_schema={ 'type': 'object', 'properties': { 'snapshot_name': { 'type': ['string', 'null'], 'default': None, 'description': 'the cluster will be restored from this snapshot, or else the latest if one exists' ' (otherwise, a new cluster will be created)' }, }, 'additionalProperties': False, }, ) stop_datastore = ActionType( name='stop_datastore', description='Stops this Redshift cluster and creates a final snapshot', ) create_snapshot = ActionType( name='create_snapshot', description= 'create a snapshot of this cluster in the form "dart-datastore-<id>-<YYYYmmddHHMMSS>"', ) execute_sql = ActionType( name='execute_sql', description='Executes a user defined SQL script', params_json_schema={ 'type': 'object', 'properties': { 'sql_script': { 'type': 'string', 'x-schema-form': { 'type': 'textarea' }, 'description': 'The SQL script to be executed' }, }, 'additionalProperties': False, 'required': ['sql_script'], }, ) load_dataset = ActionType( name='load_dataset', description='Copies the dataset from s3 to the datastore', params_json_schema={ 'type': 'object', 'properties': { 'dataset_id': { 'type': 'string', 'description': 'The id of the dataset to load' }, 's3_path_start_prefix_inclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix' }, 's3_path_end_prefix_exclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix' }, 's3_path_regex_filter': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match' }, 'target_schema_name': { 'type': ['string', 'null'], 'default': 'public', 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'created if absent' }, 'target_table_name': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_sort_keys': { 'type': 'array', 'default': [], 'maxItems': 400, 'description': 'overrides dataset setting', 'x-schema-form': { 'type': 'tabarray', 'title': "{{ value || 'sort_key ' + $index }}" }, 'items': { 'type': 'string', 'pattern': '^[a-zA-Z0-9_]+$', 'maxLength': 127 } }, 'target_distribution_key': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'distribution_style': { 'type': 'string', 'default': 'EVEN', 'enum': ['EVEN', 'ALL'], 'description': 'ignored if dist_key is chosen' }, 'sort_keys_interleaved': { 'type': ['boolean', 'null'], 'default': False, 'description': 'see AWS Redshift docs' }, 'truncate_columns': { 'type': ['boolean', 'null'], 'default': True }, 'max_errors': { 'type': ['integer', 'null'], 'default': 0, 'minimum': 0 }, 'batch_size': { 'type': ['integer', 'null'], 'default': 0, 'minimum': 0 }, }, 'additionalProperties': False, 'required': ['dataset_id'], }) consume_subscription = ActionType( name='consume_subscription', description='Consumes the next available dataset subscription elements', params_json_schema={ 'type': 'object', 'properties': { 'subscription_id': { 'type': 'string', 'description': 'The id of the subscription to consume' }, 's3_path_start_prefix_inclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The inclusive s3 path start prefix' }, 's3_path_end_prefix_exclusive': { 'type': ['string', 'null'], 'default': None, 'pattern': '^s3://.+$', 'description': 'The exclusive s3 path end prefix' }, 's3_path_regex_filter': { 'type': ['string', 'null'], 'default': None, 'description': 'A regex pattern the s3 path must match' }, 'target_schema_name': { 'type': ['string', 'null'], 'default': 'public', 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'created if absent' }, 'target_table_name': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_sort_keys': { 'type': 'array', 'default': [], 'maxItems': 400, 'description': 'overrides dataset setting', 'x-schema-form': { 'type': 'tabarray', 'title': "{{ value || 'sort_key ' + $index }}" }, 'items': { 'type': 'string', 'pattern': '^[a-zA-Z0-9_]+$', 'maxLength': 127 } }, 'target_distribution_key': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'distribution_style': { 'type': 'string', 'default': 'EVEN', 'enum': ['EVEN', 'ALL'], 'description': 'ignored if dist_key is chosen' }, 'sort_keys_interleaved': { 'type': ['boolean', 'null'], 'default': False, 'description': 'see AWS Redshift docs' }, 'truncate_columns': { 'type': ['boolean', 'null'], 'default': True }, 'max_errors': { 'type': ['integer', 'null'], 'default': 0, 'minimum': 0 }, 'batch_size': { 'type': ['integer', 'null'], 'default': 0, 'minimum': 0 }, }, 'additionalProperties': False, 'required': ['subscription_id'], }) copy_to_s3 = ActionType( name='copy_to_s3', description='exports the results of a sql statement to s3', params_json_schema={ 'type': 'object', 'properties': { 'delimiter': { 'type': ['string', 'null'], 'default': '\t', 'description': 'field delimiter' }, 'source_sql_statement': { 'type': 'string', "x-schema-form": { "type": "textarea" }, 'description': 'the SQL SELECT statement to be executed' }, 'destination_s3_path': { 'type': 'string', 'pattern': '^s3://.+$', 'description': 'The destination s3 path, e.g. s3://bucket/prefix. The following values (with braces)' ' will be substituted with the appropriate zero-padded values at runtime:' '{YEAR}, {MONTH}, {DAY}, {HOUR}, [MINUTE}, {SECOND}' }, 'parallel': { 'type': 'boolean', 'default': True, 'description': 'if false, unload sequentially as one file' }, }, 'additionalProperties': False, 'required': ['source_sql_statement', 'destination_s3_path'], }, ) data_check = ActionType( name='data_check', description='Executes a user defined, SQL data check', params_json_schema={ 'type': 'object', 'properties': { 'sql_script': { 'type': 'string', 'x-schema-form': { 'type': 'textarea' }, 'description': 'this SQL should return one row that is true (for "passed") or false (for "failed")' }, }, 'additionalProperties': False, 'required': ['sql_script'], }, )
class ElasticsearchActionTypes(object): data_check = ActionType( name='data_check', description='Executes a user defined, Elasticsearch data check', params_json_schema={ 'type': 'object', 'properties': { 'index': { 'type': 'string', 'default': '_all', 'description': 'The Elasticsearch index to perform the query on. ' + 'Leave blank or explicitly set to "_all" to perform the query on all indices.' }, 'document_type': { 'type': ['string', 'null'], 'default': None, 'description': 'The Elasticsearch document type to perform the query on. ' + 'Leave blank to perform the query on all document types.' }, 'query_body': { 'type': 'string', 'x-schema-form': {'type': 'textarea'}, 'description': 'The Elasticsearch query should return a response that contains at least one result ' + '("hits" in Elasticsearch terminology") for the data check to pass. ' + 'https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl.html' }, 'expected_count': { 'type': 'integer', 'default': 0, 'description': 'The expected count to of documents to be returned by the query. ' + ' Use this and the operator to return a truthy value for the data check to pass.' }, 'operator': { 'type': 'string', 'default': '>', 'description': 'The operator to apply to the query and expected count. ' + 'i.e. result count > expected count ', 'enum': [ '>', '>=', '<', '<=', '==', '!=' ] } }, 'additionalProperties': False, 'required': ['query_body'], }, ) create_index = ActionType( name='create_index', description='Creates an Elasticsearch index', params_json_schema={ 'type': 'object', 'properties': { 'index': { 'type': 'string', 'default': '_all', 'description': 'The Elasticsearch index to create.' }, 'mapping': { 'type': 'string', 'x-schema-form': {'type': 'textarea'}, 'description': 'The Elasticsearch index mapping.' }, }, 'additionalProperties': False, 'required': ['index'], }, ) create_mapping = ActionType( name='create_mapping', description='Creates an Elasticsearch mapping', params_json_schema={ 'type': 'object', 'properties': { 'index': { 'type': 'string', 'default': '_all', 'description': 'The Elasticsearch index to create the mapping for.' + 'Explicitly set to "_all" or leave blank to create the mapping for all indices.' }, 'document_type': { 'type': 'string', 'description': 'The Elasticsearch document type to create the mapping for.' }, 'mapping': { 'type': 'string', 'x-schema-form': {'type': 'textarea'}, 'description': 'The Elasticsearch mapping.' }, }, 'additionalProperties': False, 'required': ['mapping', 'document_type'], }, ) create_template = ActionType( name='create_template', description='Creates an Elasticsearch template', params_json_schema={ 'type': 'object', 'properties': { 'template_name': { 'type': 'string', 'description': 'The Elasticsearch template name to create.' }, 'template': { 'type': 'string', 'x-schema-form': {'type': 'textarea'}, 'description': 'The Elasticsearch template.' }, }, 'additionalProperties': False, 'required': ['template', 'template_name'], }, ) delete_index = ActionType( name='delete_index', description='Deletes an Elasticsearch index', params_json_schema={ 'type': 'object', 'properties': { 'index': { 'type': 'string', 'default': '_all', 'description': 'The Elasticsearch index to delete.' } }, 'additionalProperties': False, 'required': ['index'], }, ) delete_template = ActionType( name='delete_template', description='Deletes an Elasticsearch template', params_json_schema={ 'type': 'object', 'properties': { 'template_name': { 'type': 'string', 'description': 'The Elasticsearch template name to delete.' }, }, 'additionalProperties': False, 'required': ['template_name'], }, ) force_merge_index = ActionType( name='force_merge_index', description='Force merges an Elasticsearch index', params_json_schema={ 'type': 'object', 'properties': { 'index': { 'type': 'string', 'default': '_all', 'description': 'A comma-separated list of index names; use \"_all" or empty string to perform the operation on all indices.' }, 'flush': { 'type': 'boolean', 'default': True, 'description': 'Specify whether the index should be flushed after performing the operation' }, 'allow_no_indices': { 'type': 'boolean', 'default': False, 'description': 'Whether to ignore if a wildcard indices expression resolves into no concrete indices.' + '(This includes "_all" string or when no indices have been specified)' }, 'expand_wildcards': { 'type': 'string', 'default': 'open', 'pattern': '^(open|closed|none|all)$', 'description': 'Whether to expand wildcard expression to concrete indices that are open, closed or ' + 'both. default is "open". valid choices are: "open", "closed", "none", "all"' }, 'max_num_segments': { 'type': ['integer', 'null'], 'default': None, 'description': 'The number of segments the index should be merged into (default: dynamic)' }, 'only_expunge_deletes': { 'type': 'boolean', 'default': False, 'description': 'Specify whether the operation should only expunge deleted documents' }, 'wait_for_merge': { 'type': 'boolean', 'default': True, 'description': 'Specify whether the request should block until the merge process is finished' } }, 'additionalProperties': False, 'required': ['index'], }, )