def run(self):
     _logger.info('reading configuration...')
     output_config = copy.deepcopy(configuration(self.input_config_path, suppress_decryption=True))
     dart_host = self._get_dart_host(output_config)
     _logger.info('setting up new dart partial environment: %s' % dart_host)
     self.create_partial(output_config)
     _logger.info('partial environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
Esempio n. 2
0
 def __init__(self, logger, configure_app_context=True):
     config_path = os.environ['DART_CONFIG']
     self.dart_config = configuration(config_path)
     logging.config.dictConfig(self.dart_config['logging'])
     set_dart_environment_variables(self.dart_config['dart'].get('ecs_agent_data_path'))
     logger.info('loaded config from path: %s' % config_path)
     if configure_app_context:
         self.app_context = AppContext(self.dart_config, ['dart.web'])
Esempio n. 3
0
 def run(self):
     _logger.info('reading configuration...')
     output_config = copy.deepcopy(
         configuration(self.input_config_path, suppress_decryption=True))
     dart_host = _get_dart_host(output_config)
     _logger.info('setting up new dart partial environment: %s' % dart_host)
     self.create_partial(output_config)
     _logger.info('partial environment created with config: %s, url: %s' %
                  (self.output_config_s3_path, dart_host))
Esempio n. 4
0
 def __init__(self, logger, configure_app_context=True):
     config_path = os.getenv('DART_CONFIG')
     if config_path:
         self.dart_config = configuration(config_path)
         logging.config.dictConfig(self.dart_config['logging'])
         set_dart_environment_variables(self.dart_config['dart'].get('ecs_agent_data_path'))
         logger.info('loaded config from path: %s' % config_path)
         if configure_app_context:
             self.app_context = AppContext(self.dart_config, ['dart.web'])
     else:
         logger.error("missing DART_CONFIG env variable")
         raise ValueError("missing DART_CONFIG env variable")
Esempio n. 5
0
 def __init__(self, logger, configure_app_context=True):
     config_path = os.getenv('DART_CONFIG')
     if config_path:
         self.dart_config = configuration(config_path)
         logging.config.dictConfig(self.dart_config['logging'])
         set_dart_environment_variables(
             self.dart_config['dart'].get('ecs_agent_data_path'))
         logger.info('loaded config from path: %s' % config_path)
         if configure_app_context:
             self.app_context = AppContext(self.dart_config, ['dart.web'])
     else:
         logger.error("missing DART_CONFIG env variable")
         raise ValueError("missing DART_CONFIG env variable")
Esempio n. 6
0
        onelogin_server=config.get('auth').get('onelogin_server'),
        private_key=config.get('auth').get('private_key'),
        x509cert=config.get('auth').get('x509cert'),
        dart_server=config.get('auth').get('dart_server')
    )

    f = open('./ui/onelogin' + '/settings.json', 'w')
    f.write(file_str)
    f.close()




api_version_prefix = '/api/1'
config_path = os.environ['DART_CONFIG']
config = configuration(config_path)
logging.config.dictConfig(config['logging'])
_logger.info('loaded config from path: %s' % config_path)


app = Flask(__name__, template_folder='ui/templates', static_folder='ui/static')

app.dart_context = AppContext(
    config=config,
    exclude_injectable_module_paths=[
        'dart.message.engine_listener',
        'dart.message.trigger_listener',
        'dart.message.subscription_listener'
    ]
)
Esempio n. 7
0
                            'type':
                            'string',
                            'minLength':
                            1,
                            'maxLength':
                            256,
                            'pattern':
                            '^[a-zA-Z0-9]+[a-zA-Z0-9\-\.]*\.es\.amazonaws\.com$',
                            'description':
                            'The AWS Elasticsearch domain endpoint that you use to submit index and search requests.'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['endpoint']
                },
                supported_action_types=[
                    ElasticsearchActionTypes.data_check,
                    ElasticsearchActionTypes.create_index,
                    ElasticsearchActionTypes.create_mapping,
                    ElasticsearchActionTypes.create_template,
                    ElasticsearchActionTypes.delete_index,
                    ElasticsearchActionTypes.delete_template,
                    ElasticsearchActionTypes.force_merge_index,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved elasticsearch_engine: %s' % e1.id)


if __name__ == '__main__':
    add_elasticsearch_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 8
0
    def run(self):
        _logger.info('reading configuration...')
        output_config = copy.deepcopy(
            configuration(self.input_config_path, suppress_decryption=True))
        dart_host = _get_dart_host(output_config)
        _logger.info('setting up new dart full environment: %s' % dart_host)

        _logger.info('verifying s3 buckets do not exist')
        config_bucket_name = output_config['s3']['config_bucket']
        data_bucket_name = output_config['s3']['data_bucket']
        if 's3' not in self.stacks_to_skip:
            iequals = lambda s1, s2: s1.lower() == s2.lower()
            buckets = [
                b['Name'] for b in boto3.client('s3').list_buckets()['Buckets']
            ]
            matches = [
                b for b in buckets if (iequals(b, config_bucket_name)
                                       or iequals(b, data_bucket_name))
            ]
            if len(matches) > 0:
                raise Exception('s3 config and/or data bucket already exists!')

        _logger.info('creating initial stacks')
        aws_account_id = output_config['dart']['aws_account']
        replacements = {
            '{DART_REGION}': output_config['dart']['region'],
            '{DART_AWS_ACCOUNT}': aws_account_id,
            '{DART_QUEUE_PREFIX}': output_config['dart']['queue_prefix'],
            '{DART_CONFIG_BUCKET}': output_config['s3']['config_bucket'],
            '{DART_DATA_BUCKET}': output_config['s3']['data_bucket'],
        }
        iam_stack_name = self._create_stack('iam', output_config, replacements)
        sns_stack_name = self._create_stack('sns', output_config)

        _logger.info('waiting for stack completion')
        iam_outputs = self._wait_for_stack_completion_and_get_outputs(
            iam_stack_name, 7)
        sns_outputs = self._wait_for_stack_completion_and_get_outputs(
            sns_stack_name, 1)

        uds_inpf_role = _get_element(iam_outputs, 'OutputKey',
                                     'UdsInstanceProfileRole')['OutputValue']
        uds_ec2_inpf = _get_element(iam_outputs, 'OutputKey',
                                    'UdsEc2InstanceProfile')['OutputValue']
        uds_ec2_inpf_role = _get_element(
            iam_outputs, 'OutputKey',
            'UdsEc2InstanceProfileRole')['OutputValue']
        ecs_container_inpf = _get_element(
            iam_outputs, 'OutputKey',
            'EcsContainerInstanceProfile')['OutputValue']
        ecs_container_inpf_role = _get_element(
            iam_outputs, 'OutputKey',
            'EcsContainerInstanceProfileRole')['OutputValue']
        ecs_service_role = _get_element(iam_outputs, 'OutputKey',
                                        'EcsServiceRole')['OutputValue']
        sns_arn = sns_outputs[0]['OutputValue']

        _logger.info('updating configuration with sns arn')
        self._set_cfn_boto_param_value(output_config, 'logs', 'AlarmActions',
                                       sns_arn)

        _logger.info(
            'updating configuration with subscription queue urls/arns')
        subscription_queue_arn, subscription_queue_url = self._ensure_queue_exists(
            output_config, 'subscription_queue')
        s3_params = output_config['cloudformation_stacks']['s3']['boto_args'][
            'Parameters']
        _get_element(s3_params, 'ParameterKey',
                     'DartConfigBucket')['ParameterValue'] = config_bucket_name
        _get_element(s3_params, 'ParameterKey',
                     'DartDataBucket')['ParameterValue'] = data_bucket_name
        _get_element(
            s3_params, 'ParameterKey',
            'SubscriptionQueueUrl')['ParameterValue'] = subscription_queue_url
        _get_element(
            s3_params, 'ParameterKey',
            'SubscriptionQueueArn')['ParameterValue'] = subscription_queue_arn

        _logger.info('creating s3 and logs stacks')
        s3_stack_name = self._create_stack('s3', output_config)
        logs_stack_name = self._create_stack('logs', output_config)

        _logger.info('creating/updating kms key')
        with open(
                dart_root_relative_path(
                    output_config['kms']['key_policy_template'])) as f:
            policy = json.load(f)
            kms_authorized_users = [
                self._role_arn(ecs_container_inpf_role, aws_account_id)
            ]
            kms_authorized_users.extend(
                output_config['dart']['kms_key_user_arns'])
            policy['Statement'][0]['Principal'][
                'AWS'] = 'arn:aws:iam::%s:root' % aws_account_id
            policy['Statement'][1]['Principal']['AWS'] = output_config['dart'][
                'kms_key_admin_arns']
            policy['Statement'][2]['Principal']['AWS'] = kms_authorized_users
            policy['Statement'][3]['Principal']['AWS'] = kms_authorized_users
            policy_text = json.dumps(policy)
        kms_client = boto3.client('kms')
        key_arn = output_config['kms']['key_arn']
        if key_arn and key_arn != '...TBD...':
            kms_client.put_key_policy(KeyId=key_arn,
                                      PolicyName='default',
                                      Policy=policy_text)
        else:
            key_arn = kms_client.create_key(
                Policy=policy_text)['KeyMetadata']['Arn']
            alias = 'alias/dart-%s-secrets' % self.environment_name
            kms_client.create_alias(AliasName=alias, TargetKeyId=key_arn)

        _logger.info(
            'updating configuration with kms key arn and secrets path, etc')
        output_config['engines']['redshift_engine']['options'][
            'kms_key_arn'] = key_arn
        secrets_config = get_secrets_config(output_config)
        values = (config_bucket_name, self.environment_name)
        secrets_s3_path = 's3://%s/secrets/%s' % values
        secrets_config['secrets_s3_path'] = secrets_s3_path
        secrets_config['kms_key_arn'] = key_arn
        eng_cfg = output_config['engines']
        eng_cfg['redshift_engine']['options'][
            'secrets_s3_path'] = secrets_s3_path
        output_config['dart'][
            's3_datastores_root'] = 's3://%s/datastores/%s' % values

        _logger.info('updating configuration with iam profiles/roles')
        output_config['engines']['emr_engine']['options'][
            'instance_profile'] = uds_ec2_inpf
        output_config['engines']['emr_engine']['options'][
            'service_role'] = uds_inpf_role
        output_config['engines']['dynamodb_engine']['options'][
            'emr_instance_profile'] = uds_ec2_inpf
        output_config['engines']['dynamodb_engine']['options'][
            'emr_service_role'] = uds_inpf_role
        self._set_cfn_boto_param_value(output_config, 'engine-taskrunner',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'engine-worker',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'WebEcsServiceRoleName',
                                       ecs_service_role)
        self._set_cfn_boto_param_value(output_config, 'web',
                                       'IamInstanceProfile',
                                       ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web',
                                       'WebEcsServiceRoleName',
                                       ecs_service_role)

        _logger.info('creating ECR repos')
        ecr_client = boto3.client('ecr')
        all_repo_names = [
            self._full_repo_name(r, output_config)
            for r in output_config['ecr']['repo_names']
        ]
        existing_repo_names = []
        for repo_name in all_repo_names:
            try:
                ecr_client.describe_repositories(repositoryNames=[repo_name])
                existing_repo_names.append(repo_name)
            except ClientError as e:
                if e.response['Error'][
                        'Code'] == 'RepositoryNotFoundException':
                    continue
                raise e
        missing_repo_names = set(all_repo_names) - set(existing_repo_names)
        with open(
                dart_root_relative_path(
                    output_config['ecr']['policy_template'])) as f:
            initial_policy = json.load(f)
            initial_policy['Statement'][0]['Principal']['AWS'] = output_config[
                'dart']['ecr_authorized_user_arns']
            initial_policy_text = json.dumps(initial_policy)
        for repo_name in missing_repo_names:
            ecr_client.create_repository(repositoryName=repo_name)
            ecr_client.set_repository_policy(repositoryName=repo_name,
                                             policyText=initial_policy_text)

        _logger.info('updating ECR repo policies')
        ecr_policy_statement_sid = 'dart-%s-ecs-and-uds-permissions' % self.environment_name
        ecs_container_inpf_role_arn = self._role_arn(ecs_container_inpf_role,
                                                     aws_account_id)
        uds_ec2_inpf_role_arn = self._role_arn(uds_ec2_inpf_role,
                                               aws_account_id)
        for repo_name in all_repo_names:
            policy = json.loads(
                ecr_client.get_repository_policy(
                    repositoryName=repo_name)['policyText'])
            exists_index = None
            for i, statement in enumerate(policy['Statement']):
                if statement['Sid'] == ecr_policy_statement_sid:
                    exists_index = i
            if exists_index:
                policy['Statement'].pop(exists_index)
            policy['Statement'].append({
                'Sid':
                ecr_policy_statement_sid,
                'Effect':
                'Allow',
                'Principal': {
                    'AWS':
                    [ecs_container_inpf_role_arn, uds_ec2_inpf_role_arn]
                },
                'Action': [
                    'ecr:GetDownloadUrlForLayer', 'ecr:BatchGetImage',
                    'ecr:BatchCheckLayerAvailability',
                    'ecr:GetAuthorizationToken'
                ]
            })
            policy_text = json.dumps(policy)
            ecr_client.set_repository_policy(repositoryName=repo_name,
                                             policyText=policy_text)

        _logger.info('updating configuration with docker image references')
        output_config['local_setup'][
            'elasticmq_docker_image'] = self._docker_image(
                'elasticmq', output_config)
        eng_cfg['no_op_engine']['docker_image'] = self._docker_image(
            'engine-no_op', output_config)
        eng_cfg['emr_engine']['docker_image'] = self._docker_image(
            'engine-emr', output_config)
        eng_cfg['emr_engine']['options'][
            'impala_docker_repo_base_url'] = self._ecr_base_url(output_config)
        eng_cfg['dynamodb_engine']['docker_image'] = self._docker_image(
            'engine-dynamodb', output_config)
        eng_cfg['dynamodb_engine']['options'][
            'emr_impala_docker_repo_base_url'] = self._ecr_base_url(
                output_config)
        eng_cfg['redshift_engine']['docker_image'] = self._docker_image(
            'engine-redshift', output_config)
        ew_image = self._docker_image('engine-worker', output_config)
        sw_image = self._docker_image('subscription-worker', output_config)
        tw_image = self._docker_image('trigger-worker', output_config)
        fl_image = self._docker_image('flask', output_config)
        nx_image = self._docker_image('nginx', output_config)
        cwl_image = self._docker_image('cloudwatchlogs', output_config)
        self._set_cfn_boto_param_value(output_config, 'engine-taskrunner',
                                       'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'engine-worker',
                                       'EngineWorkerDockerImage', ew_image)
        self._set_cfn_boto_param_value(output_config, 'engine-worker',
                                       'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker',
                                       'SubscriptionWorkerDockerImage',
                                       sw_image)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker',
                                       'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker',
                                       'TriggerWorkerDockerImage', tw_image)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker',
                                       'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'FlaskWorkerDockerImage', fl_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'NginxWorkerDockerImage', nx_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'web',
                                       'FlaskWorkerDockerImage', fl_image)
        self._set_cfn_boto_param_value(output_config, 'web',
                                       'NginxWorkerDockerImage', nx_image)
        self._set_cfn_boto_param_value(output_config, 'web',
                                       'CloudWatchLogsDockerImage', cwl_image)

        _logger.info('updating configuration with DartConfig references')
        self._set_cfn_boto_param_value(output_config, 'engine-worker',
                                       'DartConfig',
                                       self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker',
                                       'DartConfig',
                                       self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker',
                                       'DartConfig',
                                       self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'web-internal',
                                       'DartConfig',
                                       self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'web', 'DartConfig',
                                       self.output_config_s3_path)
        eng_cfg['no_op_engine']['config'] = self.output_config_s3_path
        eng_cfg['emr_engine']['config'] = self.output_config_s3_path
        eng_cfg['dynamodb_engine']['config'] = self.output_config_s3_path
        eng_cfg['redshift_engine']['config'] = self.output_config_s3_path

        _logger.info('waiting for logs stack')
        logs_outputs = self._wait_for_stack_completion_and_get_outputs(
            logs_stack_name, 2)
        syslog_log_group_name = _get_element(logs_outputs, 'OutputKey',
                                             'DartSyslog')['OutputValue']
        misc_log_group_name = _get_element(logs_outputs, 'OutputKey',
                                           'DartMisc')['OutputValue']

        self._handle_docker_concerns(cwl_image, eng_cfg, misc_log_group_name,
                                     output_config, syslog_log_group_name)

        _logger.info('waiting for s3 stack')
        self._wait_for_stack_completion_and_get_outputs(s3_stack_name)

        self.create_partial(output_config)

        _logger.info('full environment created with config: %s, url: %s' %
                     (self.output_config_s3_path, dart_host))
Esempio n. 9
0
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                        'target_distribution_key': {
                            'type': ['string', 'null'],
                            'default': None,
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                        'target_sort_key': {
                            'type': ['string', 'null'],
                            'default': None,
                            'pattern': '^[a-zA-Z0-9_]+$',
                            'description': 'overrides dataset setting'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['dataset_id'],
                },
                supported_action_types=[
                    DynamoDBActionTypes.create_table,
                    DynamoDBActionTypes.delete_table,
                    DynamoDBActionTypes.load_dataset,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved dynamodb_engine: %s' % e1.id)


if __name__ == '__main__':
    add_dynamodb_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 10
0
                    'description': 'the secret_access_key for accessing this elasticsearch cluster. '
                                   + 'Leave blank to use Dart\'s instance profile credentials'
                },
                'endpoint': {
                    'type': 'string',
                    'minLength': 1,
                    'maxLength': 256,
                    'pattern': '^[a-zA-Z0-9]+[a-zA-Z0-9\-\.]*\.es\.amazonaws\.com$',
                    'description': 'The AWS Elasticsearch domain endpoint that you use to submit index and search requests.'
                },
            },
            'additionalProperties': False,
            'required': ['endpoint']
        },
        supported_action_types=[
            ElasticsearchActionTypes.data_check,
            ElasticsearchActionTypes.create_index,
            ElasticsearchActionTypes.create_mapping,
            ElasticsearchActionTypes.create_template,
            ElasticsearchActionTypes.delete_index,
            ElasticsearchActionTypes.delete_template,
            ElasticsearchActionTypes.force_merge_index,
        ],
        ecs_task_definition=ecs_task_definition
    )))
    _logger.info('saved elasticsearch_engine: %s' % e1.id)


if __name__ == '__main__':
    add_elasticsearch_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 11
0
                Action(id=Ref.action(7),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_fails.name,
                           action_type_name=NoOpActionTypes.action_that_fails.
                           name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=3,
                           state=ActionState.TEMPLATE,
                       )),
            ],
            triggers=[
                Trigger(id=Ref.trigger(1),
                        data=TriggerData(
                            name='no-op-trigger-workflow-completion',
                            trigger_type_name=workflow_completion_trigger.name,
                            workflow_ids=[Ref.workflow(2)],
                            state=TriggerState.ACTIVE,
                            args={'completed_workflow_id': Ref.workflow(1)})),
            ],
        ))
    ]

    for e in subgraph_definitions:
        s = dart.save_subgraph_definition(e, engine_id)
        _logger.info('created subgraph_definition: %s' % s.id)


if __name__ == '__main__':
    add_no_op_engine_sub_graphs(configuration(os.environ['DART_CONFIG']))
    def run(self):
        _logger.info('reading configuration...')
        output_config = copy.deepcopy(configuration(self.input_config_path, suppress_decryption=True))
        dart_host = _get_dart_host(output_config)
        _logger.info('setting up new dart full environment: %s' % dart_host)

        _logger.info('verifying s3 buckets do not exist')
        config_bucket_name = output_config['s3']['config_bucket']
        data_bucket_name = output_config['s3']['data_bucket']
        if 's3' not in self.stacks_to_skip:
            iequals = lambda s1, s2: s1.lower() == s2.lower()
            buckets = [b['Name'] for b in boto3.client('s3').list_buckets()['Buckets']]
            matches = [b for b in buckets if (iequals(b, config_bucket_name) or iequals(b, data_bucket_name))]
            if len(matches) > 0:
                raise Exception('s3 config and/or data bucket already exists!')

        _logger.info('creating initial stacks')
        aws_account_id = output_config['dart']['aws_account']
        replacements = {
            '{DART_REGION}': output_config['dart']['region'],
            '{DART_AWS_ACCOUNT}': aws_account_id,
            '{DART_QUEUE_PREFIX}': output_config['dart']['queue_prefix'],
            '{DART_CONFIG_BUCKET}': output_config['s3']['config_bucket'],
            '{DART_DATA_BUCKET}': output_config['s3']['data_bucket'],
        }
        iam_stack_name = self._create_stack('iam', output_config, replacements)
        sns_stack_name = self._create_stack('sns', output_config)

        _logger.info('waiting for stack completion')
        iam_outputs = self._wait_for_stack_completion_and_get_outputs(iam_stack_name, 7)
        sns_outputs = self._wait_for_stack_completion_and_get_outputs(sns_stack_name, 1)

        uds_inpf_role = _get_element(iam_outputs, 'OutputKey', 'UdsInstanceProfileRole')['OutputValue']
        uds_ec2_inpf = _get_element(iam_outputs, 'OutputKey', 'UdsEc2InstanceProfile')['OutputValue']
        uds_ec2_inpf_role = _get_element(iam_outputs, 'OutputKey', 'UdsEc2InstanceProfileRole')['OutputValue']
        ecs_container_inpf = _get_element(iam_outputs, 'OutputKey', 'EcsContainerInstanceProfile')['OutputValue']
        ecs_container_inpf_role = _get_element(iam_outputs, 'OutputKey', 'EcsContainerInstanceProfileRole')['OutputValue']
        ecs_service_role = _get_element(iam_outputs, 'OutputKey', 'EcsServiceRole')['OutputValue']
        sns_arn = sns_outputs[0]['OutputValue']

        _logger.info('updating configuration with sns arn')
        self._set_cfn_boto_param_value(output_config, 'logs', 'AlarmActions', sns_arn)

        _logger.info('updating configuration with subscription queue urls/arns')
        subscription_queue_arn, subscription_queue_url = self._ensure_queue_exists(output_config, 'subscription_queue')
        s3_params = output_config['cloudformation_stacks']['s3']['boto_args']['Parameters']
        _get_element(s3_params, 'ParameterKey', 'DartConfigBucket')['ParameterValue'] = config_bucket_name
        _get_element(s3_params, 'ParameterKey', 'DartDataBucket')['ParameterValue'] = data_bucket_name
        _get_element(s3_params, 'ParameterKey', 'SubscriptionQueueUrl')['ParameterValue'] = subscription_queue_url
        _get_element(s3_params, 'ParameterKey', 'SubscriptionQueueArn')['ParameterValue'] = subscription_queue_arn

        _logger.info('creating s3 and logs stacks')
        s3_stack_name = self._create_stack('s3', output_config)
        logs_stack_name = self._create_stack('logs', output_config)

        _logger.info('creating/updating kms key')
        with open(dart_root_relative_path(output_config['kms']['key_policy_template'])) as f:
            policy = json.load(f)
            kms_authorized_users = [self._role_arn(ecs_container_inpf_role, aws_account_id)]
            kms_authorized_users.extend(output_config['dart']['kms_key_user_arns'])
            policy['Statement'][0]['Principal']['AWS'] = 'arn:aws:iam::%s:root' % aws_account_id
            policy['Statement'][1]['Principal']['AWS'] = output_config['dart']['kms_key_admin_arns']
            policy['Statement'][2]['Principal']['AWS'] = kms_authorized_users
            policy['Statement'][3]['Principal']['AWS'] = kms_authorized_users
            policy_text = json.dumps(policy)
        kms_client = boto3.client('kms')
        key_arn = output_config['kms']['key_arn']
        if key_arn and key_arn != '...TBD...':
            kms_client.put_key_policy(KeyId=key_arn, PolicyName='default', Policy=policy_text)
        else:
            key_arn = kms_client.create_key(Policy=policy_text)['KeyMetadata']['Arn']
            alias = 'alias/dart-%s-secrets' % self.environment_name
            kms_client.create_alias(AliasName=alias, TargetKeyId=key_arn)

        _logger.info('updating configuration with kms key arn and secrets path, etc')
        output_config['engines']['redshift_engine']['options']['kms_key_arn'] = key_arn
        secrets_config = get_secrets_config(output_config)
        values = (config_bucket_name, self.environment_name)
        secrets_s3_path = 's3://%s/secrets/%s' % values
        secrets_config['secrets_s3_path'] = secrets_s3_path
        secrets_config['kms_key_arn'] = key_arn
        eng_cfg = output_config['engines']
        eng_cfg['redshift_engine']['options']['secrets_s3_path'] = secrets_s3_path
        output_config['dart']['s3_datastores_root'] = 's3://%s/datastores/%s' % values

        _logger.info('updating configuration with iam profiles/roles')
        output_config['engines']['emr_engine']['options']['instance_profile'] = uds_ec2_inpf
        output_config['engines']['emr_engine']['options']['service_role'] = uds_inpf_role
        output_config['engines']['dynamodb_engine']['options']['emr_instance_profile'] = uds_ec2_inpf
        output_config['engines']['dynamodb_engine']['options']['emr_service_role'] = uds_inpf_role
        self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'engine-worker', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'WebEcsServiceRoleName', ecs_service_role)
        self._set_cfn_boto_param_value(output_config, 'web', 'IamInstanceProfile', ecs_container_inpf)
        self._set_cfn_boto_param_value(output_config, 'web', 'WebEcsServiceRoleName', ecs_service_role)

        _logger.info('creating ECR repos')
        ecr_client = boto3.client('ecr')
        all_repo_names = [self._full_repo_name(r, output_config) for r in output_config['ecr']['repo_names']]
        existing_repo_names = []
        for repo_name in all_repo_names:
            try:
                ecr_client.describe_repositories(repositoryNames=[repo_name])
                existing_repo_names.append(repo_name)
            except ClientError as e:
                if e.response['Error']['Code'] == 'RepositoryNotFoundException':
                    continue
                raise e
        missing_repo_names = set(all_repo_names) - set(existing_repo_names)
        with open(dart_root_relative_path(output_config['ecr']['policy_template'])) as f:
            initial_policy = json.load(f)
            initial_policy['Statement'][0]['Principal']['AWS'] = output_config['dart']['ecr_authorized_user_arns']
            initial_policy_text = json.dumps(initial_policy)
        for repo_name in missing_repo_names:
            ecr_client.create_repository(repositoryName=repo_name)
            ecr_client.set_repository_policy(repositoryName=repo_name, policyText=initial_policy_text)

        _logger.info('updating ECR repo policies')
        ecr_policy_statement_sid = 'dart-%s-ecs-and-uds-permissions' % self.environment_name
        ecs_container_inpf_role_arn = self._role_arn(ecs_container_inpf_role, aws_account_id)
        uds_ec2_inpf_role_arn = self._role_arn(uds_ec2_inpf_role, aws_account_id)
        for repo_name in all_repo_names:
            policy = json.loads(ecr_client.get_repository_policy(repositoryName=repo_name)['policyText'])
            exists_index = None
            for i, statement in enumerate(policy['Statement']):
                if statement['Sid'] == ecr_policy_statement_sid:
                    exists_index = i
            if exists_index:
                policy['Statement'].pop(exists_index)
            policy['Statement'].append({
                'Sid': ecr_policy_statement_sid,
                'Effect': 'Allow',
                'Principal': {'AWS': [ecs_container_inpf_role_arn, uds_ec2_inpf_role_arn]},
                'Action': [
                    'ecr:GetDownloadUrlForLayer',
                    'ecr:BatchGetImage',
                    'ecr:BatchCheckLayerAvailability',
                    'ecr:GetAuthorizationToken'
                ]
            })
            policy_text = json.dumps(policy)
            ecr_client.set_repository_policy(repositoryName=repo_name, policyText=policy_text)

        _logger.info('updating configuration with docker image references')
        output_config['local_setup']['elasticmq_docker_image'] = self._docker_image('elasticmq', output_config)
        eng_cfg['no_op_engine']['docker_image'] = self._docker_image('engine-no_op', output_config)
        eng_cfg['emr_engine']['docker_image'] = self._docker_image('engine-emr', output_config)
        eng_cfg['emr_engine']['options']['impala_docker_repo_base_url'] = self._ecr_base_url(output_config)
        eng_cfg['dynamodb_engine']['docker_image'] = self._docker_image('engine-dynamodb', output_config)
        eng_cfg['dynamodb_engine']['options']['emr_impala_docker_repo_base_url'] = self._ecr_base_url(output_config)
        eng_cfg['redshift_engine']['docker_image'] = self._docker_image('engine-redshift', output_config)
        ew_image = self._docker_image('engine-worker', output_config)
        sw_image = self._docker_image('subscription-worker', output_config)
        tw_image = self._docker_image('trigger-worker', output_config)
        fl_image = self._docker_image('flask', output_config)
        nx_image = self._docker_image('nginx', output_config)
        cwl_image = self._docker_image('cloudwatchlogs', output_config)
        self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'engine-worker', 'EngineWorkerDockerImage', ew_image)
        self._set_cfn_boto_param_value(output_config, 'engine-worker', 'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'SubscriptionWorkerDockerImage', sw_image)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'TriggerWorkerDockerImage', tw_image)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'FlaskWorkerDockerImage', fl_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'NginxWorkerDockerImage', nx_image)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'CloudWatchLogsDockerImage', cwl_image)
        self._set_cfn_boto_param_value(output_config, 'web', 'FlaskWorkerDockerImage', fl_image)
        self._set_cfn_boto_param_value(output_config, 'web', 'NginxWorkerDockerImage', nx_image)
        self._set_cfn_boto_param_value(output_config, 'web', 'CloudWatchLogsDockerImage', cwl_image)

        _logger.info('updating configuration with DartConfig references')
        self._set_cfn_boto_param_value(output_config, 'engine-worker', 'DartConfig', self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'DartConfig', self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'DartConfig', self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'web-internal', 'DartConfig', self.output_config_s3_path)
        self._set_cfn_boto_param_value(output_config, 'web', 'DartConfig', self.output_config_s3_path)
        eng_cfg['no_op_engine']['config'] = self.output_config_s3_path
        eng_cfg['emr_engine']['config'] = self.output_config_s3_path
        eng_cfg['dynamodb_engine']['config'] = self.output_config_s3_path
        eng_cfg['redshift_engine']['config'] = self.output_config_s3_path

        _logger.info('waiting for logs stack')
        logs_outputs = self._wait_for_stack_completion_and_get_outputs(logs_stack_name, 2)
        syslog_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartSyslog')['OutputValue']
        misc_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartMisc')['OutputValue']

        self._handle_docker_concerns(cwl_image, eng_cfg, misc_log_group_name, output_config, syslog_log_group_name)

        _logger.info('waiting for s3 stack')
        self._wait_for_stack_completion_and_get_outputs(s3_stack_name)

        self.create_partial(output_config)

        _logger.info('full environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
Esempio n. 13
0
from flask.ext.login import login_required
from dart.auth.required_roles import required_roles
from sqlalchemy import text

from dart.context.database import db
from dart.model.mutex import Mutexes, MutexState
from dart.util.rand import random_id
from dart.config.config import configuration

from dart.web.ui.admin.admin_query import populate_dart_client_user, populate_dart_client_apikeys, clear_roles_table,\
    populate_roles_table, populate_user_roles_table, getPermissionServiceRolesAndIds

admin_bp = Blueprint('admin', __name__)

CONFIG_PATH = os.environ['DART_CONFIG']
CONFIG = configuration(CONFIG_PATH)
AUTH_CONFIG = CONFIG['auth']
DART_CLIENT_NAME = CONFIG['authorization']['dart_client_name']
PERMISSION_CONFIG = CONFIG['permission_service']

_logger = logging.getLogger(__name__)


def populate_user_api_secret_keys():
    ''' Under auth.predefined_auth_services we keep a triplet <user,api_key, secretKey>
        that an external service (e.g. portico, decode, savor) uses.  By being placed in a
        config file we can have different keys for different envs thaqt are always loaded when DART starts.
    '''
    if AUTH_CONFIG.get('predefined_auth_services'):
        for usr_api_secret in AUTH_CONFIG.get('predefined_auth_services'):
            items = usr_api_secret.split(" ")
       -- anonymous/none user datastore is free for all to edit/run/delete
       -- anonymous current user cannot do anything but view.

    * No limit on creating datastores.
"""
import logging
import os
from dart.config.config import configuration

from flask_login import current_user
from flask import make_response, current_app
from dart.service.user_roles import UserRolesService
from dart.web.api.entity_lookup import get_known_entity
from functools import wraps

_logger = logging.getLogger(__name__)
config_path = os.environ['DART_CONFIG']
config = configuration(config_path)
DART_CLIENT_NAME = config['authorization']['dart_client_name']


def dart_required_roles(action_roles):
    def wrap(f):
        @wraps(f)
        def wrapped_f(*args, **kwargs):
            return f(*args, **kwargs)

        return wrapped_f

    return wrap
Esempio n. 15
0
                    {
                        'containerPath': '/mnt/ecs_agent_data',
                        'sourceVolume': 'ecs-agent-data',
                        'readOnly': True
                    }
                ],
            }
        ],
        'volumes': [
            {
                'host': {'sourcePath': '/var/lib/ecs/data'},
                'name': 'ecs-agent-data'
            }
        ],
    }

    e1 = dart.save_engine(engine=Engine(
            id=engine_id, data=EngineData(
                    name='s3_engine',
                    description='For S3 FileCopy',
                    options_json_schema={},
                    supported_action_types=[
                        S3ActionTypes.copy
                    ],
                    ecs_task_definition=ecs_task_definition
            )))
    _logger.info('Saved s3_engine: %s' % e1.id)

if __name__ == '__main__':
    add_s3_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 16
0
                            'integer',
                            'default':
                            2,
                            'minimum':
                            1,
                            'maximum':
                            10,
                            'description':
                            'the maximum number of snapshots to keep, older ones will be deleted'
                        },
                    },
                    'additionalProperties': False,
                    'required': ['master_user_password']
                },
                supported_action_types=[
                    RedshiftActionTypes.start_datastore,
                    RedshiftActionTypes.stop_datastore,
                    RedshiftActionTypes.execute_sql,
                    RedshiftActionTypes.load_dataset,
                    RedshiftActionTypes.consume_subscription,
                    RedshiftActionTypes.copy_to_s3,
                    RedshiftActionTypes.create_snapshot,
                    RedshiftActionTypes.data_check,
                ],
                ecs_task_definition=ecs_task_definition)))
    _logger.info('saved redshift_engine: %s' % e1.id)


if __name__ == '__main__':
    add_redshift_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 17
0
                    'description': 'UTC time when automated cluster maintenance can occur'
                },
                'snapshot_retention': {
                    'type': 'integer',
                    'default': 2,
                    'minimum': 1,
                    'maximum': 10,
                    'description': 'the maximum number of snapshots to keep, older ones will be deleted'
                },
            },
            'additionalProperties': False,
            'required': ['master_user_password']
        },
        supported_action_types=[
            RedshiftActionTypes.start_datastore,
            RedshiftActionTypes.stop_datastore,
            RedshiftActionTypes.execute_sql,
            RedshiftActionTypes.load_dataset,
            RedshiftActionTypes.consume_subscription,
            RedshiftActionTypes.copy_to_s3,
            RedshiftActionTypes.create_snapshot,
            RedshiftActionTypes.data_check,
        ],
        ecs_task_definition=ecs_task_definition
    )))
    _logger.info('saved redshift_engine: %s' % e1.id)


if __name__ == '__main__':
    add_redshift_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 18
0
import os

from flask.ext.sqlalchemy import SQLAlchemy, Model
from sqlalchemy import create_engine
import sqlalchemy.sql.expression
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker

from dart.config.config import configuration


class DartDbSession(object):
    def __init__(self, model, func, session):
        self.Model = model
        self.func = func
        self.session = session


def init_dart_db(database_uri_alias='SQLALCHEMY_DATABASE_URI'):
    engine = create_engine(config['flask'][database_uri_alias], convert_unicode=True)
    db_session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=engine))
    base = declarative_base(cls=Model, name='Model')
    base.query = db_session.query_property()
    return DartDbSession(base, sqlalchemy.sql.expression.func, db_session)


config = configuration(os.environ['DART_CONFIG'])
db = SQLAlchemy() if os.environ.get('DART_ROLE') == 'web' else init_dart_db('SQLALCHEMY_DATABASE_URI')
db_replica = SQLAlchemy() if os.environ.get('DART_ROLE') == 'web' else init_dart_db('SQLALCHEMY_DATABASE_REPLICA_URI')
Esempio n. 19
0
                Trigger(id=Ref.trigger(1), data=TriggerData(
                    name='emr-trigger-subscription-1G-batch',
                    trigger_type_name=subscription_batch_trigger.name,
                    workflow_ids=[Ref.workflow(1)],
                    args={
                        'subscription_id': Ref.subscription(1),
                        'unconsumed_data_size_in_bytes': 1000*1000*1000
                    }
                )),
            ],
            actions=[
                Action(id=Ref.action(1), data=ActionData(
                    name='emr-action-consume_subscription',
                    action_type_name=EmrActionTypes.consume_subscription.name,
                    engine_name='emr_engine',
                    workflow_id=Ref.workflow(1),
                    state=ActionState.TEMPLATE,
                    args={'subscription_id': Ref.subscription(1)}
                )),
            ]
        ))
    ]

    for e in subgraph_definitions:
        s = dart.save_subgraph_definition(e, engine_id)
        _logger.info('created subgraph_definition: %s' % s.id)


if __name__ == '__main__':
    add_emr_engine_sub_graphs(configuration(os.environ['DART_CONFIG']))
Esempio n. 20
0
    e1 = dart.save_engine(Engine(id=engine_id, data=EngineData(
        name='no_op_engine',
        description='Helps engineering test dart',
        options_json_schema={
            'type': 'object',
            'properties': {
                'action_sleep_time_in_seconds': {
                    'type': 'integer',
                    'minimum': 0,
                    'default': 5,
                    'description': 'The time to sleep for each action before completing'
                },
            },
            'additionalProperties': False,
            'required': [],
        },
        supported_action_types=[
            NoOpActionTypes.action_that_succeeds,
            NoOpActionTypes.action_that_fails,
            NoOpActionTypes.copy_hdfs_to_s3_action,
            NoOpActionTypes.load_dataset,
            NoOpActionTypes.consume_subscription
        ],
        ecs_task_definition=ecs_task_definition
    )))
    _logger.info('saved no_op_engine: %s' % e1.id)


if __name__ == '__main__':
    add_no_op_engine(configuration(os.environ['DART_CONFIG']))
Esempio n. 21
0
            }
        ],
    }

    e1 = dart.save_engine(Engine(id=engine_id, data=EngineData(
        name='dynamodb_engine',
        description='For DynamoDB tables',
        options_json_schema={
            'type': 'object',
            'properties': {
                'dataset_id': {'type': 'string', 'description': 'The id of the dataset on which the table is based'},
                'target_table_name': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'},
                'target_distribution_key': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'},
                'target_sort_key': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'},
            },
            'additionalProperties': False,
            'required': ['dataset_id'],
        },
        supported_action_types=[
            DynamoDBActionTypes.create_table,
            DynamoDBActionTypes.delete_table,
            DynamoDBActionTypes.load_dataset,
        ],
        ecs_task_definition=ecs_task_definition
    )))
    _logger.info('saved dynamodb_engine: %s' % e1.id)


if __name__ == '__main__':
    add_dynamodb_engine(configuration(os.environ['DART_CONFIG']))