def build_bootstrap_action_configs(region, emrfs_args):
    bootstrap_actions = []

    _verify_emrfs_args(emrfs_args)

    if _need_to_configure_cse(emrfs_args, 'CUSTOM'):
        # Download custom encryption provider from Amazon S3 to EMR Cluster
        bootstrap_actions.append(
            emrutils.build_bootstrap_action(
                path=constants.EMRFS_CSE_CUSTOM_S3_GET_BA_PATH,
                name=constants.S3_GET_BA_NAME,
                args=[
                    constants.S3_GET_BA_SRC,
                    emrfs_args.get('CustomProviderLocation'),
                    constants.S3_GET_BA_DEST, constants.EMRFS_CUSTOM_DEST_PATH,
                    constants.S3_GET_BA_FORCE
                ]))

    emrfs_setup_ba_args = _build_ba_args_to_setup_emrfs(emrfs_args)
    bootstrap_actions.append(
        emrutils.build_bootstrap_action(path=emrutils.build_s3_link(
            relative_path=constants.CONFIG_HADOOP_PATH, region=region),
                                        name=constants.EMRFS_BA_NAME,
                                        args=emrfs_setup_ba_args))

    return bootstrap_actions
Beispiel #2
0
def build_bootstrap_action_configs(region, emrfs_args):
    bootstrap_actions = []

    _verify_emrfs_args(emrfs_args)

    if _need_to_configure_cse(emrfs_args, 'CUSTOM'):
        # Download custom encryption provider from Amazon S3 to EMR Cluster
        bootstrap_actions.append(
            emrutils.build_bootstrap_action(
                path=constants.EMRFS_CSE_CUSTOM_S3_GET_BA_PATH,
                name=constants.S3_GET_BA_NAME,
                args=[constants.S3_GET_BA_SRC,
                      emrfs_args.get('CustomProviderLocation'),
                      constants.S3_GET_BA_DEST,
                      constants.EMRFS_CUSTOM_DEST_PATH,
                      constants.S3_GET_BA_FORCE]))

    emrfs_setup_ba_args = _build_ba_args_to_setup_emrfs(emrfs_args)
    bootstrap_actions.append(
        emrutils.build_bootstrap_action(
            path=emrutils.build_s3_link(
                relative_path=constants.CONFIG_HADOOP_PATH,
                region=region),
            name=constants.EMRFS_BA_NAME,
            args=emrfs_setup_ba_args))

    return bootstrap_actions
def _build_impala_install_bootstrap_action(region, args=None):
    args_list = [
        constants.BASE_PATH_ARG,
        emrutils.build_s3_link(region=region), constants.IMPALA_VERSION,
        constants.LATEST
    ]
    if args is not None:
        args_list.append(constants.IMPALA_CONF)
        args_list.append(','.join(args))
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_IMPALA_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.IMPALA_INSTALL_PATH, region=region),
        args=args_list)
Beispiel #4
0
def _build_impala_install_bootstrap_action(region, args=None):
    args_list = [
        constants.BASE_PATH_ARG,
        emrutils.build_s3_link(region=region),
        constants.IMPALA_VERSION,
        constants.LATEST]
    if args is not None:
        args_list.append(constants.IMPALA_CONF)
        args_list.append(','.join(args))
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_IMPALA_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.IMPALA_INSTALL_PATH,
            region=region),
        args=args_list)
Beispiel #5
0
def build_impala_install_bootstrap_action(region, version, args=None):
    if version is None:
        version = 'latest'
    args_list = [
        constants.BASE_PATH_ARG,
        emrutils.build_s3_link(region=region), constants.IMPALA_VERSION,
        version
    ]
    if args is not None:
        args_list.append(constants.IMPALA_CONF)
        args_list += args
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_IMPALA_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.IMPALA_INSTALL_PATH, region=region),
        args=args_list)
Beispiel #6
0
def build_impala_install_bootstrap_action(region, version, args=None):
    if version is None:
        version = 'latest'
    args_list = [
        constants.BASE_PATH_ARG,
        emrutils.build_s3_link(region=region),
        constants.IMPALA_VERSION,
        version]
    if args is not None:
        args_list.append(constants.IMPALA_CONF)
        args_list += args
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_IMPALA_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.IMPALA_INSTALL_PATH,
            region=region),
        args=args_list)
    def _run_main(self, parsed_args, parsed_globals):
        emr = self._session.get_service('emr')
        params = {}
        bootstrap_actions = []
        params['Name'] = parsed_args.name

        instances_config = {}
        instances_config['InstanceGroups'] = \
            instancegroupsutils.validate_and_build_instance_groups(
                instance_groups=parsed_args.instance_groups,
                instance_type=parsed_args.instance_type,
                instance_count=parsed_args.instance_count)

        is_valid_ami_version = re.match('\d?\..*', parsed_args.ami_version)
        if is_valid_ami_version is None:
            raise exceptions.InvalidAmiVersionError(
                ami_version=parsed_args.ami_version)
        params['AmiVersion'] = parsed_args.ami_version
        emrutils.apply_dict(params, 'AdditionalInfo',
                            parsed_args.additional_info)
        emrutils.apply_dict(params, 'LogUri', parsed_args.log_uri)
        if parsed_args.use_default_roles is True:
            parsed_args.service_role = EMR_ROLE_NAME
            if parsed_args.ec2_attributes is None:
                parsed_args.ec2_attributes = {}
            parsed_args.ec2_attributes['InstanceProfile'] = EC2_ROLE_NAME

        emrutils.apply_dict(params, 'ServiceRole', parsed_args.service_role)

        if (parsed_args.no_auto_terminate is False
                and parsed_args.auto_terminate is False):
            parsed_args.no_auto_terminate = True

        instances_config['KeepJobFlowAliveWhenNoSteps'] = \
            emrutils.apply_boolean_options(
                parsed_args.no_auto_terminate,
                '--no-auto-terminate',
                parsed_args.auto_terminate,
                '--auto-terminate')

        instances_config['TerminationProtected'] = \
            emrutils.apply_boolean_options(
                parsed_args.termination_protected,
                '--termination-protected',
                parsed_args.no_termination_protected,
                '--no-termination-protected')

        if (parsed_args.visible_to_all_users is False
                and parsed_args.no_visible_to_all_users is False):
            parsed_args.visible_to_all_users = True

        params['VisibleToAllUsers'] = \
            emrutils.apply_boolean_options(
                parsed_args.visible_to_all_users,
                '--visible-to-all-users',
                parsed_args.no_visible_to_all_users,
                '--no-visible-to-all-users')

        params['Tags'] = emrutils.parse_tags(parsed_args.tags)
        params['Instances'] = instances_config

        if parsed_args.ec2_attributes is not None:
            self._build_ec2_attributes(cluster=params,
                                       parsed_attrs=parsed_args.ec2_attributes)

        debugging_enabled = emrutils.apply_boolean_options(
            parsed_args.enable_debugging, '--enable-debugging',
            parsed_args.no_enable_debugging, '--no-enable-debugging')

        if parsed_args.log_uri is None and debugging_enabled is True:
            raise exceptions.LogUriError

        if debugging_enabled is True:
            self._update_cluster_dict(
                cluster=params,
                key='Steps',
                value=[self._build_enable_debugging(parsed_globals)])

        if parsed_args.applications is not None:
            app_list, ba_list, step_list = applicationutils.build_applications(
                session=self._session,
                parsed_applications=parsed_args.applications,
                parsed_globals=parsed_globals,
                ami_version=params['AmiVersion'])
            self._update_cluster_dict(params, 'NewSupportedProducts', app_list)
            self._update_cluster_dict(params, 'BootstrapActions', ba_list)
            self._update_cluster_dict(params, 'Steps', step_list)

        hbase_restore_config = parsed_args.restore_from_hbase_backup
        if hbase_restore_config is not None:
            args = hbaseutils.build_hbase_restore_from_backup_args(
                dir=hbase_restore_config.get('Dir'),
                backup_version=hbase_restore_config.get('BackupVersion'))
            step_config = emrutils.build_step(
                jar=constants.HBASE_JAR_PATH,
                name=constants.HBASE_RESTORE_STEP_NAME,
                action_on_failure=constants.CANCEL_AND_WAIT,
                args=args)
            self._update_cluster_dict(params, 'Steps', [step_config])

        if parsed_args.bootstrap_actions is not None:
            self._build_bootstrap_actions(
                cluster=params,
                parsed_boostrap_actions=parsed_args.bootstrap_actions)

        if parsed_args.emrfs is not None:
            emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs)
            emr_fs_ba_config = \
                emrutils.build_bootstrap_action(
                    path=emrutils.build_s3_link(
                        relative_path=constants.CONFIG_HADOOP_PATH,
                        region=parsed_globals.region),
                    name=constants.EMR_FS_BA_NAME,
                    args=emr_fs_ba_args)
            self._update_cluster_dict(cluster=params,
                                      key='BootstrapActions',
                                      value=[emr_fs_ba_config])

        if parsed_args.steps is not None:
            steps_list = steputils.build_step_config_list(
                parsed_step_list=parsed_args.steps,
                region=parsed_globals.region)
            self._update_cluster_dict(cluster=params,
                                      key='Steps',
                                      value=steps_list)

        self._validate_required_applications(parsed_args)

        run_job_flow = emr.get_operation('RunJobFlow')
        run_job_flow_response = emrutils.call(self._session, run_job_flow,
                                              params, parsed_globals.region,
                                              parsed_globals.endpoint_url,
                                              parsed_globals.verify_ssl)

        constructed_result = self._construct_result(run_job_flow_response[1])
        emrutils.display_response(self._session, run_job_flow,
                                  constructed_result, parsed_globals)

        return 0
def _build_hbase_install_bootstrap_action(region):
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_HBASE_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.HBASE_INSTALL_BA_PATH,
            region=region))
def _build_ganglia_install_bootstrap_action(region):
    return emrutils.build_bootstrap_action(
        name=constants.INSTALL_GANGLIA_NAME,
        path=emrutils.build_s3_link(
            relative_path=constants.GANGLIA_INSTALL_BA_PATH,
            region=region))
Beispiel #10
0
    def _run_main(self, parsed_args, parsed_globals):
        emr = self._session.get_service("emr")
        params = {}
        bootstrap_actions = []
        params["Name"] = parsed_args.name

        service_role_validation_message = (
            " Either choose --use-default-roles or use both --service-role "
            "<roleName> and --ec2-attributes InstanceProfile=<profileName>."
        )

        if parsed_args.use_default_roles is True and parsed_args.service_role is not None:
            raise exceptions.MutualExclusiveOptionError(
                option1="--use-default-roles", option2="--service-role", message=service_role_validation_message
            )

        if (
            parsed_args.use_default_roles is True
            and parsed_args.ec2_attributes is not None
            and "InstanceProfile" in parsed_args.ec2_attributes
        ):
            raise exceptions.MutualExclusiveOptionError(
                option1="--use-default-roles",
                option2="--ec2-attributes InstanceProfile",
                message=service_role_validation_message,
            )

        instances_config = {}
        instances_config["InstanceGroups"] = instancegroupsutils.validate_and_build_instance_groups(
            instance_groups=parsed_args.instance_groups,
            instance_type=parsed_args.instance_type,
            instance_count=parsed_args.instance_count,
        )

        is_valid_ami_version = re.match("\d?\..*", parsed_args.ami_version)
        if is_valid_ami_version is None:
            raise exceptions.InvalidAmiVersionError(ami_version=parsed_args.ami_version)
        params["AmiVersion"] = parsed_args.ami_version
        emrutils.apply_dict(params, "AdditionalInfo", parsed_args.additional_info)
        emrutils.apply_dict(params, "LogUri", parsed_args.log_uri)

        if parsed_args.use_default_roles is True:
            parsed_args.service_role = EMR_ROLE_NAME
            if parsed_args.ec2_attributes is None:
                parsed_args.ec2_attributes = {}
            parsed_args.ec2_attributes["InstanceProfile"] = EC2_ROLE_NAME

        emrutils.apply_dict(params, "ServiceRole", parsed_args.service_role)

        if parsed_args.no_auto_terminate is False and parsed_args.auto_terminate is False:
            parsed_args.no_auto_terminate = True

        instances_config["KeepJobFlowAliveWhenNoSteps"] = emrutils.apply_boolean_options(
            parsed_args.no_auto_terminate, "--no-auto-terminate", parsed_args.auto_terminate, "--auto-terminate"
        )

        instances_config["TerminationProtected"] = emrutils.apply_boolean_options(
            parsed_args.termination_protected,
            "--termination-protected",
            parsed_args.no_termination_protected,
            "--no-termination-protected",
        )

        if parsed_args.visible_to_all_users is False and parsed_args.no_visible_to_all_users is False:
            parsed_args.visible_to_all_users = True

        params["VisibleToAllUsers"] = emrutils.apply_boolean_options(
            parsed_args.visible_to_all_users,
            "--visible-to-all-users",
            parsed_args.no_visible_to_all_users,
            "--no-visible-to-all-users",
        )

        params["Tags"] = emrutils.parse_tags(parsed_args.tags)
        params["Instances"] = instances_config

        if parsed_args.ec2_attributes is not None:
            self._build_ec2_attributes(cluster=params, parsed_attrs=parsed_args.ec2_attributes)

        debugging_enabled = emrutils.apply_boolean_options(
            parsed_args.enable_debugging, "--enable-debugging", parsed_args.no_enable_debugging, "--no-enable-debugging"
        )

        if parsed_args.log_uri is None and debugging_enabled is True:
            raise exceptions.LogUriError

        if debugging_enabled is True:
            self._update_cluster_dict(cluster=params, key="Steps", value=[self._build_enable_debugging(parsed_globals)])

        if parsed_args.applications is not None:
            app_list, ba_list, step_list = applicationutils.build_applications(
                session=self._session,
                parsed_applications=parsed_args.applications,
                parsed_globals=parsed_globals,
                ami_version=params["AmiVersion"],
            )
            self._update_cluster_dict(params, "NewSupportedProducts", app_list)
            self._update_cluster_dict(params, "BootstrapActions", ba_list)
            self._update_cluster_dict(params, "Steps", step_list)

        hbase_restore_config = parsed_args.restore_from_hbase_backup
        if hbase_restore_config is not None:
            args = hbaseutils.build_hbase_restore_from_backup_args(
                dir=hbase_restore_config.get("Dir"), backup_version=hbase_restore_config.get("BackupVersion")
            )
            step_config = emrutils.build_step(
                jar=constants.HBASE_JAR_PATH,
                name=constants.HBASE_RESTORE_STEP_NAME,
                action_on_failure=constants.CANCEL_AND_WAIT,
                args=args,
            )
            self._update_cluster_dict(params, "Steps", [step_config])

        if parsed_args.bootstrap_actions is not None:
            self._build_bootstrap_actions(cluster=params, parsed_boostrap_actions=parsed_args.bootstrap_actions)

        if parsed_args.emrfs is not None:
            emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs)
            emr_fs_ba_config = emrutils.build_bootstrap_action(
                path=emrutils.build_s3_link(relative_path=constants.CONFIG_HADOOP_PATH, region=parsed_globals.region),
                name=constants.EMR_FS_BA_NAME,
                args=emr_fs_ba_args,
            )
            self._update_cluster_dict(cluster=params, key="BootstrapActions", value=[emr_fs_ba_config])

        if parsed_args.steps is not None:
            steps_list = steputils.build_step_config_list(
                parsed_step_list=parsed_args.steps, region=parsed_globals.region
            )
            self._update_cluster_dict(cluster=params, key="Steps", value=steps_list)

        self._validate_required_applications(parsed_args)

        run_job_flow = emr.get_operation("RunJobFlow")
        run_job_flow_response = emrutils.call(
            self._session,
            run_job_flow,
            params,
            parsed_globals.region,
            parsed_globals.endpoint_url,
            parsed_globals.verify_ssl,
        )

        constructed_result = self._construct_result(run_job_flow_response[1])
        emrutils.display_response(self._session, run_job_flow, constructed_result, parsed_globals)

        return 0
    def _run_main(self, parsed_args, parsed_globals):
        emr = self._session.get_service('emr')
        params = {}
        bootstrap_actions = []
        params['Name'] = parsed_args.name

        instances_config = {}
        instances_config['InstanceGroups'] = \
            instancegroupsutils.validate_and_build_instance_groups(
                instance_groups=parsed_args.instance_groups,
                instance_type=parsed_args.instance_type,
                instance_count=parsed_args.instance_count)

        is_valid_ami_version = re.match('\d?\..*', parsed_args.ami_version)
        if is_valid_ami_version is None:
            raise exceptions.InvalidAmiVersionError(
                ami_version=parsed_args.ami_version)
        params['AmiVersion'] = parsed_args.ami_version
        emrutils.apply_dict(
            params, 'AdditionalInfo', parsed_args.additional_info)
        emrutils.apply_dict(params, 'LogUri', parsed_args.log_uri)
        if parsed_args.use_default_roles is True:
            parsed_args.service_role = EMR_ROLE_NAME
            if parsed_args.ec2_attributes is None:
                parsed_args.ec2_attributes = {}
            parsed_args.ec2_attributes['InstanceProfile'] = EC2_ROLE_NAME

        emrutils.apply_dict(params, 'ServiceRole', parsed_args.service_role)

        if (
                parsed_args.no_auto_terminate is False and
                parsed_args.auto_terminate is False):
            parsed_args.no_auto_terminate = True

        instances_config['KeepJobFlowAliveWhenNoSteps'] = \
            emrutils.apply_boolean_options(
                parsed_args.no_auto_terminate,
                '--no-auto-terminate',
                parsed_args.auto_terminate,
                '--auto-terminate')

        instances_config['TerminationProtected'] = \
            emrutils.apply_boolean_options(
                parsed_args.termination_protected,
                '--termination-protected',
                parsed_args.no_termination_protected,
                '--no-termination-protected')

        if (
                parsed_args.visible_to_all_users is False and
                parsed_args.no_visible_to_all_users is False):
            parsed_args.visible_to_all_users = True

        params['VisibleToAllUsers'] = \
            emrutils.apply_boolean_options(
                parsed_args.visible_to_all_users,
                '--visible-to-all-users',
                parsed_args.no_visible_to_all_users,
                '--no-visible-to-all-users')

        params['Tags'] = emrutils.parse_tags(parsed_args.tags)
        params['Instances'] = instances_config

        if parsed_args.ec2_attributes is not None:
            self._build_ec2_attributes(
                cluster=params, parsed_attrs=parsed_args.ec2_attributes)

        debugging_enabled = emrutils.apply_boolean_options(
            parsed_args.enable_debugging,
            '--enable-debugging',
            parsed_args.no_enable_debugging,
            '--no-enable-debugging')

        if parsed_args.log_uri is None and debugging_enabled is True:
            raise exceptions.LogUriError

        if debugging_enabled is True:
            self._update_cluster_dict(
                cluster=params,
                key='Steps',
                value=[self._build_enable_debugging(parsed_globals)])

        if parsed_args.applications is not None:
            app_list, ba_list, step_list = applicationutils.build_applications(
                session=self._session,
                parsed_applications=parsed_args.applications,
                parsed_globals=parsed_globals,
                ami_version=params['AmiVersion'])
            self._update_cluster_dict(
                params, 'NewSupportedProducts', app_list)
            self._update_cluster_dict(
                params, 'BootstrapActions', ba_list)
            self._update_cluster_dict(
                params, 'Steps', step_list)

        hbase_restore_config = parsed_args.restore_from_hbase_backup
        if hbase_restore_config is not None:
            args = hbaseutils.build_hbase_restore_from_backup_args(
                dir=hbase_restore_config.get('Dir'),
                backup_version=hbase_restore_config.get('BackupVersion'))
            step_config = emrutils.build_step(
                jar=constants.HBASE_JAR_PATH,
                name=constants.HBASE_RESTORE_STEP_NAME,
                action_on_failure=constants.CANCEL_AND_WAIT,
                args=args)
            self._update_cluster_dict(
                params, 'Steps', [step_config])

        if parsed_args.bootstrap_actions is not None:
            self._build_bootstrap_actions(
                cluster=params,
                parsed_boostrap_actions=parsed_args.bootstrap_actions)

        if parsed_args.emrfs is not None:
            emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs)
            emr_fs_ba_config = \
                emrutils.build_bootstrap_action(
                    path=emrutils.build_s3_link(
                        relative_path=constants.CONFIG_HADOOP_PATH,
                        region=parsed_globals.region),
                    name=constants.EMR_FS_BA_NAME,
                    args=emr_fs_ba_args)
            self._update_cluster_dict(
                cluster=params, key='BootstrapActions',
                value=[emr_fs_ba_config])

        if parsed_args.steps is not None:
            steps_list = steputils.build_step_config_list(
                parsed_step_list=parsed_args.steps,
                region=parsed_globals.region)
            self._update_cluster_dict(
                cluster=params, key='Steps', value=steps_list)

        self._validate_required_applications(parsed_args)

        run_job_flow = emr.get_operation('RunJobFlow')
        run_job_flow_response = emrutils.call(
            self._session, run_job_flow, params,
            parsed_globals.region, parsed_globals.endpoint_url,
            parsed_globals.verify_ssl)

        constructed_result = self._construct_result(run_job_flow_response[1])
        emrutils.display_response(self._session, run_job_flow,
                                  constructed_result, parsed_globals)

        return 0