Ejemplo n.º 1
0
def add_s3_engine(config):
    engine_config = config['engines']['s3_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving s3 engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 's3_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family': 'dart-%s-s3_engine' % config['dart']['env_name'],
        'containerDefinitions': [
            {
                'name': 'dart-s3_engine',
                'cpu': 64,
                'memory': 256,
                'image': engine_config['docker_image'],
                'logConfiguration': {'logDriver': 'syslog'},
                'environment': [
                    {'name': 'DART_ROLE', 'value': 'worker:engine_s3'},
                    {'name': 'DART_CONFIG', 'value': engine_config['config']},
                    {'name': 'AWS_DEFAULT_REGION', 'value': opts['region']}
                ],
                'mountPoints': [
                    {
                        'containerPath': '/mnt/ecs_agent_data',
                        'sourceVolume': 'ecs-agent-data',
                        'readOnly': True
                    }
                ],
            }
        ],
        'volumes': [
            {
                'host': {'sourcePath': '/var/lib/ecs/data'},
                'name': 'ecs-agent-data'
            }
        ],
    }

    e1 = dart.save_engine(engine=Engine(
            id=engine_id, data=EngineData(
                    name='s3_engine',
                    description='For S3 FileCopy',
                    options_json_schema={},
                    supported_action_types=[
                        S3ActionTypes.copy
                    ],
                    ecs_task_definition=ecs_task_definition
            )))
    _logger.info('Saved s3_engine: %s' % e1.id)
Ejemplo n.º 2
0
def put_engine(engine):
    js = request.get_json()
    engineFromJS = Engine.from_dict(js)
    engine = engine_service().update_engine(engine, engineFromJS)
    return {'results': engine.to_dict()}
Ejemplo n.º 3
0
def post_engine():
    engine = engine_service().save_engine(Engine.from_dict(request.get_json()))
    return {'results': engine.to_dict()}
Ejemplo n.º 4
0
def add_redshift_engine(config):
    engine_config = config['engines']['redshift_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving redshift_engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'redshift_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family':
        'dart-%s-redshift_engine' % config['dart']['env_name'],
        'containerDefinitions': [{
            'name':
            'dart-redshift_engine',
            'cpu':
            64,
            'memory':
            256,
            'image':
            engine_config['docker_image'],
            'logConfiguration': {
                'logDriver': 'syslog'
            },
            'environment': [{
                'name': 'DART_ROLE',
                'value': 'worker:engine_redshift'
            }, {
                'name': 'DART_CONFIG',
                'value': engine_config['config']
            }, {
                'name': 'AWS_DEFAULT_REGION',
                'value': opts['region']
            }],
            'mountPoints': [{
                'containerPath': '/mnt/ecs_agent_data',
                'sourceVolume': 'ecs-agent-data',
                'readOnly': True
            }],
        }],
        'volumes': [{
            'host': {
                'sourcePath': '/var/lib/ecs/data'
            },
            'name': 'ecs-agent-data'
        }],
    }

    e1 = dart.save_engine(
        Engine(
            id=engine_id,
            data=EngineData(
                name='redshift_engine',
                description='For Redshift clusters',
                options_json_schema={
                    'type': 'object',
                    'properties': {
                        'node_type': {
                            'type':
                            'string',
                            'default':
                            'ds2.xlarge',
                            'enum': [
                                'ds1.xlarge', 'ds1.8xlarge', 'ds2.xlarge',
                                'ds2.8xlarge', 'dc1.large', 'dc1.8xlarge'
                            ],
                            'description':
                            'the type of each node'
                        },
                        'nodes': {
                            'type': 'integer',
                            'default': 2,
                            'minimum': 2,
                            'maximum': 10,
                            'description':
                            'the number of nodes in this cluster'
                        },
                        'master_user_name': {
                            'type': ['string', 'null'],
                            'default':
                            'admin',
                            'minLength':
                            1,
                            'maxLength':
                            128,
                            'pattern':
                            '^[a-zA-Z]+[a-zA-Z0-9]*$',
                            'description':
                            'the master user name for this redshift cluster'
                        },
                        'master_user_password': {
                            'type':
                            'string',
                            'default':
                            'passw0rD--CHANGE-ME!',
                            'minLength':
                            8,
                            'maxLength':
                            64,
                            'pattern':
                            '(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*[\'"\/@\s])',
                            'x-dart-secret':
                            True,
                            'description':
                            'the master user password for this redshift cluster (hidden and ignored after'
                            +
                            ' initial save), see AWS docs for password requirements'
                        },
                        'master_db_name': {
                            'type': ['string', 'null'],
                            "default":
                            'dart',
                            'minLength':
                            1,
                            'maxLength':
                            64,
                            'pattern':
                            '^[a-z]+$',
                            'description':
                            'the master database name for this redshift cluster'
                        },
                        'cluster_identifier': {
                            'type': ['string', 'null'],
                            'default':
                            None,
                            'minLength':
                            1,
                            'maxLength':
                            63,
                            'pattern':
                            '^[a-zA-Z0-9-]*$',
                            'description':
                            'this overrides the auto-generated dart cluster_identifier'
                        },
                        'preferred_maintenance_window': {
                            'type':
                            'string',
                            'default':
                            'sat:03:30-sat:04:00',
                            'description':
                            'UTC time when automated cluster maintenance can occur'
                        },
                        'snapshot_retention': {
                            'type':
                            'integer',
                            'default':
                            2,
                            'minimum':
                            1,
                            'maximum':
                            10,
                            'description':
                            'the maximum number of snapshots to keep, older ones will be deleted'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['master_user_password']
                },
                supported_action_types=[
                    RedshiftActionTypes.start_datastore,
                    RedshiftActionTypes.stop_datastore,
                    RedshiftActionTypes.execute_sql,
                    RedshiftActionTypes.load_dataset,
                    RedshiftActionTypes.consume_subscription,
                    RedshiftActionTypes.copy_to_s3,
                    RedshiftActionTypes.create_snapshot,
                    RedshiftActionTypes.data_check,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved redshift_engine: %s' % e1.id)
Ejemplo n.º 5
0
def add_elasticsearch_engine(config):
    engine_config = config['engines']['elasticsearch_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving elasticsearch_engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'elasticsearch_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family':
        'dart-%s-elasticsearch_engine' % config['dart']['env_name'],
        'containerDefinitions': [{
            'name':
            'dart-elasticsearch_engine',
            'cpu':
            64,
            'memory':
            256,
            'image':
            engine_config['docker_image'],
            'logConfiguration': {
                'logDriver': 'syslog'
            },
            'environment': [{
                'name': 'DART_ROLE',
                'value': 'worker:engine_elasticsearch'
            }, {
                'name': 'DART_CONFIG',
                'value': engine_config['config']
            }, {
                'name': 'AWS_DEFAULT_REGION',
                'value': opts['region']
            }],
            'mountPoints': [{
                'containerPath': '/mnt/ecs_agent_data',
                'sourceVolume': 'ecs-agent-data',
                'readOnly': True
            }],
        }],
        'volumes': [{
            'host': {
                'sourcePath': '/var/lib/ecs/data'
            },
            'name': 'ecs-agent-data'
        }],
    }

    e1 = dart.save_engine(
        Engine(
            id=engine_id,
            data=EngineData(
                name='elasticsearch_engine',
                description='For Elasticsearch clusters',
                options_json_schema={
                    'type': 'object',
                    'properties': {
                        'access_key_id': {
                            'type':
                            'string',
                            'default':
                            '',
                            'oneOf': [
                                {
                                    'minLength': 20,
                                    'maxLength': 20,
                                    # 20 character alpha numerical, all uppercase
                                    'pattern': '^[A-Z0-9]{20}$'
                                },
                                {
                                    'minLength': 0,
                                    'maxLength': 0
                                }
                            ],
                            'description':
                            'the access_key_id for accessing this elasticsearch cluster. '
                            +
                            'Leave blank to use Dart\'s instance profile credentials'
                        },
                        'secret_access_key': {
                            'type':
                            'string',
                            'default':
                            '',
                            'oneOf': [
                                {
                                    'minLength': 0,
                                    'maxLength': 0
                                },
                                {
                                    'minLength':
                                    40,
                                    'maxLength':
                                    40,
                                    # from AWS Security Blog on how to match secret access keys. 40 characters
                                    'pattern':
                                    '(?<![A-Za-z0-9/+=])[A-Za-z0-9/+=]{40}(?![A-Za-z0-9/+=])'
                                }
                            ],
                            'x-dart-secret':
                            True,
                            'description':
                            'the secret_access_key for accessing this elasticsearch cluster. '
                            +
                            'Leave blank to use Dart\'s instance profile credentials'
                        },
                        'endpoint': {
                            'type':
                            'string',
                            'minLength':
                            1,
                            'maxLength':
                            256,
                            'pattern':
                            '^[a-zA-Z0-9]+[a-zA-Z0-9\-\.]*\.es\.amazonaws\.com$',
                            'description':
                            'The AWS Elasticsearch domain endpoint that you use to submit index and search requests.'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['endpoint']
                },
                supported_action_types=[
                    ElasticsearchActionTypes.data_check,
                    ElasticsearchActionTypes.create_index,
                    ElasticsearchActionTypes.create_mapping,
                    ElasticsearchActionTypes.create_template,
                    ElasticsearchActionTypes.delete_index,
                    ElasticsearchActionTypes.delete_template,
                    ElasticsearchActionTypes.force_merge_index,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved elasticsearch_engine: %s' % e1.id)
Ejemplo n.º 6
0
def put_engine(engine):
    js = request.get_json()
    engineFromJS = Engine.from_dict(js)
    engine = engine_service().update_engine(engine, engineFromJS)
    return {'results': engine.to_dict()}
Ejemplo n.º 7
0
def post_engine():
    engine = engine_service().save_engine(Engine.from_dict(request.get_json()))
    return {'results': engine.to_dict()}
Ejemplo n.º 8
0
def put_engine(engine):
    engine = engine_service().update_engine(engine, Engine.from_dict(request.get_json()))
    return {'results': engine.to_dict()}
Ejemplo n.º 9
0
def add_no_op_engine(config):
    engine_config = config['engines']['no_op_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving no_op_engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'no_op_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family':
        'dart-%s-no_op_engine' % config['dart']['env_name'],
        'containerDefinitions': [{
            'name':
            'dart-no_op_engine',
            'cpu':
            64,
            'memory':
            256,
            'image':
            engine_config['docker_image'],
            'logConfiguration': {
                'logDriver': 'syslog'
            },
            'environment': [{
                'name': 'DART_ROLE',
                'value': 'worker:engine_no_op'
            }, {
                'name': 'DART_CONFIG',
                'value': engine_config['config']
            }, {
                'name': 'AWS_DEFAULT_REGION',
                'value': opts['region']
            }],
            'mountPoints': [{
                'containerPath': '/mnt/ecs_agent_data',
                'sourceVolume': 'ecs-agent-data',
                'readOnly': True
            }],
        }],
        'volumes': [{
            'host': {
                'sourcePath': '/var/lib/ecs/data'
            },
            'name': 'ecs-agent-data'
        }],
    }

    e1 = dart.save_engine(
        Engine(
            id=engine_id,
            data=EngineData(
                name='no_op_engine',
                description='Helps engineering test dart',
                options_json_schema={
                    'type': 'object',
                    'properties': {
                        'action_sleep_time_in_seconds': {
                            'type':
                            'integer',
                            'minimum':
                            0,
                            'default':
                            5,
                            'description':
                            'The time to sleep for each action before completing'
                        },
                    },
                    'additionalProperties': False,
                    'required': [],
                },
                supported_action_types=[
                    NoOpActionTypes.action_that_succeeds,
                    NoOpActionTypes.action_that_fails,
                    NoOpActionTypes.copy_hdfs_to_s3_action,
                    NoOpActionTypes.load_dataset,
                    NoOpActionTypes.consume_subscription
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved no_op_engine: %s' % e1.id)
Ejemplo n.º 10
0
def add_dynamodb_engine(config):
    engine_config = config['engines']['dynamodb_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving dynamodb_engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'dynamodb_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family':
        'dart-%s-dynamodb_engine' % config['dart']['env_name'],
        'containerDefinitions': [{
            'name':
            'dart-dynamodb_engine',
            'cpu':
            64,
            'memory':
            256,
            'image':
            engine_config['docker_image'],
            'logConfiguration': {
                'logDriver': 'syslog'
            },
            'environment': [{
                'name': 'DART_ROLE',
                'value': 'worker:engine_dynamodb'
            }, {
                'name': 'DART_CONFIG',
                'value': engine_config['config']
            }, {
                'name': 'AWS_DEFAULT_REGION',
                'value': opts['emr_region']
            }],
            'mountPoints': [{
                'containerPath': '/mnt/ecs_agent_data',
                'sourceVolume': 'ecs-agent-data',
                'readOnly': True
            }],
        }],
        'volumes': [{
            'host': {
                'sourcePath': '/var/lib/ecs/data'
            },
            'name': 'ecs-agent-data'
        }],
    }

    e1 = dart.save_engine(
        Engine(
            id=engine_id,
            data=EngineData(
                name='dynamodb_engine',
                description='For DynamoDB tables',
                options_json_schema={
                    'type': 'object',
                    'properties': {
                        'dataset_id': {
                            'type':
                            'string',
                            'description':
                            'The id of the dataset on which the table is based'
                        },
                        'target_table_name': {
                            'type': ['string', 'null'],
                            'default': None,
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                        'target_distribution_key': {
                            'type': ['string', 'null'],
                            'default': None,
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                        'target_sort_key': {
                            'type': ['string', 'null'],
                            'default': None,
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['dataset_id'],
                },
                supported_action_types=[
                    DynamoDBActionTypes.create_table,
                    DynamoDBActionTypes.delete_table,
                    DynamoDBActionTypes.load_dataset,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved dynamodb_engine: %s' % e1.id)
Ejemplo n.º 11
0
def add_emr_engine(config):
    engine_config = config['engines']['emr_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving emr_engine')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'emr_engine':
            engine_id = e.id

    ecs_task_definition = None if config['dart']['use_local_engines'] else {
        'family':
        'dart-%s-emr_engine' % config['dart']['env_name'],
        'containerDefinitions': [{
            'name':
            'dart-emr_engine',
            'cpu':
            64,
            'memory':
            256,
            'image':
            engine_config['docker_image'],
            'logConfiguration': {
                'logDriver': 'syslog'
            },
            'environment': [{
                'name': 'DART_ROLE',
                'value': 'worker:engine_emr'
            }, {
                'name': 'DART_CONFIG',
                'value': engine_config['config']
            }, {
                'name': 'AWS_DEFAULT_REGION',
                'value': opts['region']
            }],
            'mountPoints': [{
                'containerPath': '/mnt/ecs_agent_data',
                'sourceVolume': 'ecs-agent-data',
                'readOnly': True
            }],
        }],
        'volumes': [{
            'host': {
                'sourcePath': '/var/lib/ecs/data'
            },
            'name': 'ecs-agent-data'
        }],
    }

    e1 = dart.save_engine(
        Engine(
            id=engine_id,
            data=EngineData(
                name='emr_engine',
                description=
                'For EMR clusters that use Hive, Impala, Spark, etc.',
                options_json_schema={
                    'type': 'object',
                    'properties': {
                        'release_label': {
                            'type': 'string',
                            'pattern': '^emr-[0-9].[0-9].[0-9]+$',
                            'default': 'emr-4.2.0',
                            'description': 'desired EMR release label'
                        },
                        'instance_type': {
                            'readonly':
                            True,
                            'type': ['string', 'null'],
                            'default':
                            'm3.2xlarge',
                            'description':
                            'The ec2 instance type of master/core nodes'
                        },
                        'instance_count': {
                            'type': ['integer', 'null'],
                            'default':
                            None,
                            'minimum':
                            1,
                            'maximum':
                            30,
                            'description':
                            'The total number of nodes in this cluster (overrides data_to_freespace_ratio)'
                        },
                        'data_to_freespace_ratio': {
                            'type': ['number', 'null'],
                            'default': 0.5,
                            'minimum': 0.0,
                            'maximum': 1.0,
                            'description':
                            'Desired ratio of HDFS data/free-space'
                        },
                        'dry_run': {
                            'type': ['boolean', 'null'],
                            'default':
                            False,
                            'description':
                            'write extra_data to actions, but do not actually run'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['release_label'],
                },
                supported_action_types=[
                    EmrActionTypes.start_datastore,
                    EmrActionTypes.terminate_datastore,
                    EmrActionTypes.load_dataset,
                    EmrActionTypes.consume_subscription,
                    EmrActionTypes.run_hive_script_action,
                    EmrActionTypes.run_impala_script_action,
                    EmrActionTypes.run_pyspark_script_action,
                    EmrActionTypes.copy_hdfs_to_s3_action
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved emr_engine: %s' % e1.id)
Ejemplo n.º 12
0
def put_engine(engine):
    engine = engine_service().update_engine(
        engine, Engine.from_dict(request.get_json()))
    return {'results': engine.to_dict()}