def load_target(config, target_name):
    target_key = None
    for key in config.keys():
        if key.startswith("target "):
            targets = key[7:].split()
            if target_name in targets:
                target_key = key
                fn_key = targets[0]
                break

    if target_key is not None:
        target = config[target_key]
        if "profile" not in target:
            raise exceptions.MissingParametersError(object_name=target_name, missing="profile")

        profile_name = target.pop("profile")
        if profile_name not in config["profiles"]:
            raise exceptions.ProfileNotFound(profile=profile_name)

        temporary_config = OrderedDict()

        if "default" in config["profiles"]:
            temporary_config["default"] = config["profiles"]["default"]

        profile = config["profiles"][profile_name]
        if "source_profile" in profile:
            source_profile_name = profile["source_profile"]
            if source_profile_name not in config["profiles"]:
                raise exceptions.ProfileNotFound(source_profile_name)

            source_profile_key = "profile {}".format(source_profile_name)
            temporary_config[source_profile_key] = config["profiles"][source_profile_name]

        profile_copy = profile.copy()
        profile_copy.update(target)
        profile_key = "profile {}".format(profile_name)
        temporary_config[profile_key] = profile_copy

        return profile_name, fn_key, temporary_config
    else:
        raise exceptions.ProfileNotFound(profile=key)
Exemple #2
0
def check_required_field(structure, name, value):
    if not value:
        raise exceptions.MissingParametersError(object_name=structure,
                                                missing=name)
Exemple #3
0
def get_emr_cluster_settings(user_inputs: dict, config_file: str,
                             metadata_file: str, role: str, cluster_name: str):
    """
    Reads a json file passed in as a parameter, fills in defaults
    where appropriate, then returns a standard dictionary structure
    that can be used further to run various operations on EMR clusters,
    like launch the cluster, add steps to it, etc.

    Args:
        user_inputs (dict): user entered inputs via Quickfabric UI.
        config_file (str): full path to the file containing cluster settings
        metadata_file (str): full path to the cluster metadata setting
        role (str): emr role name
        cluster_name (str): emr cluster name
    Returns:
        a dictionary containing cluster parameters. A value should be provided
        for each setting, whether it's a default value or empty, unless it is a
        required setting, in which case an exception will be raised if it is
        missing from the file
    """

    # Read emr config file
    with open(config_file, 'r') as emr_conf_file:
        emr_config = json.load(emr_conf_file)['Cluster-Configurations']

    # Check if the passed role name exist the emr-config file
    if role in emr_config.keys():
        emr_role = role
    else:
        logger.info(
            f"Role name '{role}' not defined in the EMR config...using default config"
        )
        emr_role = 'default'

    role_config = emr_config[emr_role]
    default_config = emr_config['InstanceGroup-Configurations']

    # Read emr metadata file, which has account level settings like vpc, subnet
    with open(metadata_file) as account_metadata_file:
        metadata_config = json.load(account_metadata_file)

    required_keys = ['vpc_id', "private_subnet_ids", 'emr_version', 'keypair']

    missing_keys = []
    for key in required_keys:
        if not key in metadata_config:
            missing_keys.append(key)
    if len(missing_keys) != 0:
        raise KeyError('Required settings not found in file: ', metadata_file,
                       missing_keys)

    # BOOTSTRAP ACTIONS are going to be a list, similar to steps.
    # required settings/configurations should be inserted at the start of the
    src_dir = os.path.dirname(os.path.dirname(__file__))
    emr_sw_config_file = f'{src_dir}/conf/common/configurations.json'
    with open(emr_sw_config_file) as config_json:
        configuration = json.load(config_json)

    try:
        # EMR construct config parameters
        parameters = {
            "Name":
            cluster_name,
            "LogUri":
            role_config.get('EMRS3LogPath')
            or metadata_config.get('emr_s3_log_path'),
            "Configurations":
            configuration,
            "ReleaseLabel":
            role_config.get('emr_version')
            or metadata_config.get('emr_version'),
            "Instances":
            get_instance_configuration(user_inputs, role_config,
                                       default_config, metadata_config,
                                       cluster_name),
            "Applications":
            get_app_list(role_config.get('app_list')),
            "JobFlowRole":
            role_config.get('EC2Role') or constants.EC2_ROLE_NAME,
            "ServiceRole":
            role_config.get('EMRRole') or constants.EMR_ROLE_NAME,
            "AutoScalingRole":
            role_config.get('EC2Role') or constants.EMR_AUTOSCALING_ROLE_NAME,
            "BootstrapActions":
            get_bootstrap_actions(user_inputs, role_config, metadata_config),
            "Steps":
            get_emr_steps(role_config, metadata_config),
            "Tags":
            tag_list(role_config, metadata_config, cluster_name),
            "VisibleToAllUsers":
            True,
            "CustomAmiId":
            user_inputs.get('custom_ami_id')
            or metadata_config.get('custom_ami') or get_amazon_linux_ami()
        }
    except Exception as exception:
        logger.error("Error building EMR cluster creation parameters",
                     exception)
        raise exceptions.MissingParametersError(
            f"Error building EMR cluster creation parameters {str(exception)}")

    return parameters