Exemple #1
0
class S3Engine(object):
    def __init__(self, dart_host, dart_port, dart_api_version):
        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            S3ActionTypes.copy.name: s3_copy
        }

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            _logger.info("*** S3Engine.run_action: %s", action.data.action_type_name)
            error_message = 'unsupported action: %s' % action.data.action_type_name
            assert action.data.action_type_name in self._action_handlers, error_message
            handler = self._action_handlers[action.data.action_type_name]
            handler(**action.data.args)
        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
Exemple #2
0
class S3Engine(ActionRunner):
    def __init__(self, region, dart_host, dart_port, dart_api_version):
        super(S3Engine, self).__init__()
        self.region = region
        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            S3ActionTypes.copy.name: copy,
            S3ActionTypes.data_check.name: data_check,
        }

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            _logger.info("*** S3Engine.run_action: %s", action_type_name)
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = '{m}\r\r\r{t}'.format(
                m=str(e.message),
                t=traceback.format_exc(),
            )

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
            self.publish_sns_message(action, error_message, state)
Exemple #3
0
class S3Engine(object):
    def __init__(self, region, dart_host, dart_port, dart_api_version):
        self.region = region
        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            S3ActionTypes.copy.name: copy,
            S3ActionTypes.data_check.name: data_check,
        }

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            _logger.info("*** S3Engine.run_action: %s", action_type_name)
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
Exemple #4
0
class NoOpEngine(object):
    def __init__(self, region, dart_host='localhost', dart_port=5000, dart_api_version=1):
        self.region = region
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            sleep_seconds = datastore.data.args['action_sleep_time_in_seconds']
            _logger.info('sleeping for %s seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)

            if action.data.action_type_name == NoOpActionTypes.action_that_fails.name:
                state = ActionResultState.FAILURE
                error_message = '%s failed as expected' % NoOpActionTypes.action_that_fails.name

            if action.data.action_type_name == NoOpActionTypes.consume_subscription.name:
                subscription_elements = self.dart.get_subscription_elements(action.id)
                _logger.info('consuming subscription, size = %s' % len(list(subscription_elements)))

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
Exemple #5
0
class ElasticsearchEngine(object):
    def __init__(self, kms_key_arn, secrets_s3_path, dart_host, dart_port, dart_api_version=1):

        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            ElasticsearchActionTypes.data_check.name: data_check,
            ElasticsearchActionTypes.create_index.name: create_index,
            ElasticsearchActionTypes.create_template.name: create_template,
            ElasticsearchActionTypes.create_mapping.name: create_mapping,
            ElasticsearchActionTypes.delete_index.name: delete_index,
            ElasticsearchActionTypes.delete_template.name: delete_template,
            ElasticsearchActionTypes.force_merge_index.name: force_merge_index,
        }
        self.secrets = Secrets(kms_key_arn, secrets_s3_path)

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            _logger.info('**** ElasticsearchEngine.run_action: %s', action_type_name)
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
Exemple #6
0
class EmrEngine(object):
    def __init__(self, ec2_keyname, instance_profile, service_role, region, core_node_limit,
                 impala_docker_repo_base_url, impala_version, cluster_tags, cluster_availability_zone,
                 dart_host, dart_port, dart_api_version=1):

        self._action_handlers = {
            EmrActionTypes.start_datastore.name: start_datastore,
            EmrActionTypes.terminate_datastore.name: terminate_datastore,
            EmrActionTypes.load_dataset.name: load_dataset,
            EmrActionTypes.consume_subscription.name: consume_subscription,
            EmrActionTypes.run_hive_script_action.name: run_hive_script,
            EmrActionTypes.run_impala_script_action.name: run_impala_script,
            EmrActionTypes.run_pyspark_script_action.name: run_pyspark_script,
            EmrActionTypes.copy_hdfs_to_s3_action.name: copy_hdfs_to_s3,
        }
        self._region = RegionInfo(self, region, 'elasticmapreduce.%s.amazonaws.com' % region) if region else None
        self._conn = None
        self.ec2_keyname = ec2_keyname
        self.core_node_limit = core_node_limit
        self.instance_profile = instance_profile
        self.service_role = service_role
        self.cluster_tags = cluster_tags
        self.cluster_availability_zone = cluster_availability_zone
        self.impala_docker_repo_base_url = impala_docker_repo_base_url
        self.impala_version = impala_version
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    @property
    def conn(self):
        if self._conn:
            return self._conn
        self._conn = EmrConnection(region=self._region)
        return self._conn

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        consume_subscription_state = None
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except ActionFailedButConsumeSuccessfulException as e:
            state = ActionResultState.FAILURE
            consume_subscription_state = ConsumeSubscriptionResultState.SUCCESS
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message, consume_subscription_state))
Exemple #7
0
class EmrEngine(object):
    def __init__(self, ec2_keyname, instance_profile, service_role, subnet_id, region, core_node_limit,
                 impala_docker_repo_base_url, impala_version, cluster_tags, dart_host, dart_port, dart_api_version=1):

        self._action_handlers = {
            EmrActionTypes.start_datastore.name: start_datastore,
            EmrActionTypes.terminate_datastore.name: terminate_datastore,
            EmrActionTypes.load_dataset.name: load_dataset,
            EmrActionTypes.consume_subscription.name: consume_subscription,
            EmrActionTypes.run_hive_script_action.name: run_hive_script,
            EmrActionTypes.run_impala_script_action.name: run_impala_script,
            EmrActionTypes.run_pyspark_script_action.name: run_pyspark_script,
            EmrActionTypes.copy_hdfs_to_s3_action.name: copy_hdfs_to_s3,
        }
        self._region = RegionInfo(self, region, 'elasticmapreduce.%s.amazonaws.com' % region) if region else None
        self._conn = None
        self.ec2_keyname = ec2_keyname
        self.core_node_limit = core_node_limit
        self.instance_profile = instance_profile
        self.service_role = service_role
        self.subnet_id = subnet_id
        self.cluster_tags = cluster_tags
        self.impala_docker_repo_base_url = impala_docker_repo_base_url
        self.impala_version = impala_version
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    @property
    def conn(self):
        if self._conn:
            return self._conn
        self._conn = EmrConnection(region=self._region)
        return self._conn

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        consume_subscription_state = None
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except ActionFailedButConsumeSuccessfulException as e:
            state = ActionResultState.FAILURE
            consume_subscription_state = ConsumeSubscriptionResultState.SUCCESS
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message, consume_subscription_state))
Exemple #8
0
class DynamoDBEngine(object):
    def __init__(self,
                 emr_ec2_keyname,
                 emr_instance_profile,
                 emr_service_role,
                 emr_region,
                 emr_core_node_limit,
                 emr_impala_docker_repo_base_url,
                 emr_impala_version,
                 emr_cluster_tags,
                 emr_cluster_availability_zone,
                 dart_host,
                 dart_port,
                 dart_api_version=1,
                 emr_release_label='emr-4.2.0',
                 emr_instance_type='m3.2xlarge'):

        self.emr_release_label = emr_release_label
        self.emr_instance_type = emr_instance_type
        self._action_handlers = {
            DynamoDBActionTypes.create_table.name: create_table,
            DynamoDBActionTypes.delete_table.name: delete_table,
            DynamoDBActionTypes.load_dataset.name: load_dataset,
        }
        self.emr_engine = EmrEngine(emr_ec2_keyname, emr_instance_profile,
                                    emr_service_role, emr_region,
                                    emr_core_node_limit,
                                    emr_impala_docker_repo_base_url,
                                    emr_impala_version, emr_cluster_tags,
                                    emr_cluster_availability_zone, dart_host,
                                    dart_port, dart_api_version)
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    def run(self):
        action_context = self.dart.engine_action_checkout(
            os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        consume_subscription_state = None
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = e.message + '\n\n\n' + traceback.format_exc()

        finally:
            self.dart.engine_action_checkin(
                action.id,
                ActionResult(state, error_message, consume_subscription_state))
Exemple #9
0
class RedshiftEngine(ActionRunner):
    def __init__(self, kms_key_arn, secrets_s3_path, vpc_subnet, security_group_ids,
                 region, availability_zones, publicly_accessible, cluster_tags,
                 dart_host, dart_port, dart_api_version=1):
        super(RedshiftEngine, self).__init__()

        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            RedshiftActionTypes.start_datastore.name: start_datastore,
            RedshiftActionTypes.stop_datastore.name: stop_datastore,
            RedshiftActionTypes.execute_sql.name: execute_sql,
            RedshiftActionTypes.load_dataset.name: load_dataset,
            RedshiftActionTypes.consume_subscription.name: consume_subscription,
            RedshiftActionTypes.copy_to_s3.name: copy_to_s3,
            RedshiftActionTypes.create_snapshot.name: create_snapshot,
            RedshiftActionTypes.data_check.name: data_check,
            RedshiftActionTypes.cluster_maintenance.name: cluster_maintenance,
        }
        self.vpc_subnet = vpc_subnet
        self.availability_zones = availability_zones
        self.publicly_accessible = publicly_accessible
        self.security_group_ids = security_group_ids
        self.cluster_tags = cluster_tags
        self.region = region
        self.secrets = Secrets(kms_key_arn, secrets_s3_path)

    def random_availability_zone(self):
        return self.availability_zones[random.randint(0, len(self.availability_zones) - 1)]

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            _logger.info("**** RedshiftEngine.run_action: %s", action_type_name)
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = '{m}\r\r\r{t}'.format(
                m=str(e.message),
                t=traceback.format_exc(),
            )

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message))
            self.publish_sns_message(action, error_message, state)
Exemple #10
0
class ElasticsearchEngine(ActionRunner):
    def __init__(self,
                 kms_key_arn,
                 secrets_s3_path,
                 dart_host,
                 dart_port,
                 dart_api_version=1,
                 **kwargs):
        super(ElasticsearchEngine, self).__init__()

        self.dart = Dart(dart_host, dart_port, dart_api_version)
        self._action_handlers = {
            ElasticsearchActionTypes.data_check.name: data_check,
            ElasticsearchActionTypes.create_index.name: create_index,
            ElasticsearchActionTypes.create_template.name: create_template,
            ElasticsearchActionTypes.create_mapping.name: create_mapping,
            ElasticsearchActionTypes.delete_index.name: delete_index,
            ElasticsearchActionTypes.delete_template.name: delete_template,
            ElasticsearchActionTypes.force_merge_index.name: force_merge_index,
        }
        self.secrets = Secrets(kms_key_arn, secrets_s3_path)

    def run(self):
        action_context = self.dart.engine_action_checkout(
            os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            _logger.info('**** ElasticsearchEngine.run_action: %s',
                         action_type_name)
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = '{m}\r\r\r{t}'.format(
                m=str(e.message),
                t=traceback.format_exc(),
            )

        finally:
            self.dart.engine_action_checkin(action.id,
                                            ActionResult(state, error_message))
            self.publish_sns_message(action, error_message, state)
Exemple #11
0
class DynamoDBEngine(ActionRunner):
    def __init__(self, emr_ec2_keyname, emr_instance_profile, emr_service_role, emr_region, emr_core_node_limit,
                 emr_impala_docker_repo_base_url, emr_impala_version, emr_cluster_tags, emr_cluster_availability_zone,
                 dart_host, dart_port, dart_api_version=1, emr_release_label='emr-4.2.0',
                 emr_instance_type='m3.2xlarge'):
        super(DynamoDBEngine, self).__init__()

        self.emr_release_label = emr_release_label
        self.emr_instance_type = emr_instance_type
        self._action_handlers = {
            DynamoDBActionTypes.create_table.name: create_table,
            DynamoDBActionTypes.delete_table.name: delete_table,
            DynamoDBActionTypes.load_dataset.name: load_dataset,
        }
        self.emr_engine = EmrEngine(
            emr_ec2_keyname, emr_instance_profile, emr_service_role, emr_region, emr_core_node_limit,
            emr_impala_docker_repo_base_url, emr_impala_version, emr_cluster_tags, emr_cluster_availability_zone,
            dart_host, dart_port, dart_api_version
        )
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    def run(self):
        action_context = self.dart.engine_action_checkout(os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        consume_subscription_state = None
        error_message = None
        try:
            action_type_name = action.data.action_type_name
            assert action_type_name in self._action_handlers, 'unsupported action: %s' % action_type_name
            handler = self._action_handlers[action_type_name]
            handler(self, datastore, action)

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = '{m}\r\r\r{t}'.format(
                m=str(e.message),
                t=traceback.format_exc(),
            )

        finally:
            self.dart.engine_action_checkin(action.id, ActionResult(state, error_message, consume_subscription_state))
            self.publish_sns_message(action, error_message, state)
Exemple #12
0
class NoOpEngine(ActionRunner):
    def __init__(self,
                 region,
                 dart_host='localhost',
                 dart_port=5000,
                 dart_api_version=1):
        super(NoOpEngine, self).__init__()
        self.region = region
        self.dart = Dart(dart_host, dart_port, dart_api_version)

    def run(self):
        action_context = self.dart.engine_action_checkout(
            os.environ.get('DART_ACTION_ID'))
        action = action_context.action
        datastore = action_context.datastore

        state = ActionResultState.SUCCESS
        error_message = None
        try:
            sleep_seconds = datastore.data.args['action_sleep_time_in_seconds']
            _logger.info('sleeping for %s seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)

            if action.data.action_type_name == NoOpActionTypes.action_that_fails.name:
                state = ActionResultState.FAILURE
                error_message = '%s failed as expected' % NoOpActionTypes.action_that_fails.name

            if action.data.action_type_name == NoOpActionTypes.consume_subscription.name:
                subscription_elements = self.dart.get_subscription_elements(
                    action.id)
                _logger.info('consuming subscription, size = %s' %
                             len(list(subscription_elements)))

        except Exception as e:
            state = ActionResultState.FAILURE
            error_message = '{m}\r\r\r{t}'.format(
                m=str(e.message),
                t=traceback.format_exc(),
            )

        finally:
            self.dart.engine_action_checkin(action.id,
                                            ActionResult(state, error_message))
            self.publish_sns_message(action, error_message, state)