def load_target(config, target_name): target_key = None for key in config.keys(): if key.startswith("target "): targets = key[7:].split() if target_name in targets: target_key = key fn_key = targets[0] break if target_key is not None: target = config[target_key] if "profile" not in target: raise exceptions.MissingParametersError(object_name=target_name, missing="profile") profile_name = target.pop("profile") if profile_name not in config["profiles"]: raise exceptions.ProfileNotFound(profile=profile_name) temporary_config = OrderedDict() if "default" in config["profiles"]: temporary_config["default"] = config["profiles"]["default"] profile = config["profiles"][profile_name] if "source_profile" in profile: source_profile_name = profile["source_profile"] if source_profile_name not in config["profiles"]: raise exceptions.ProfileNotFound(source_profile_name) source_profile_key = "profile {}".format(source_profile_name) temporary_config[source_profile_key] = config["profiles"][source_profile_name] profile_copy = profile.copy() profile_copy.update(target) profile_key = "profile {}".format(profile_name) temporary_config[profile_key] = profile_copy return profile_name, fn_key, temporary_config else: raise exceptions.ProfileNotFound(profile=key)
def check_required_field(structure, name, value): if not value: raise exceptions.MissingParametersError(object_name=structure, missing=name)
def get_emr_cluster_settings(user_inputs: dict, config_file: str, metadata_file: str, role: str, cluster_name: str): """ Reads a json file passed in as a parameter, fills in defaults where appropriate, then returns a standard dictionary structure that can be used further to run various operations on EMR clusters, like launch the cluster, add steps to it, etc. Args: user_inputs (dict): user entered inputs via Quickfabric UI. config_file (str): full path to the file containing cluster settings metadata_file (str): full path to the cluster metadata setting role (str): emr role name cluster_name (str): emr cluster name Returns: a dictionary containing cluster parameters. A value should be provided for each setting, whether it's a default value or empty, unless it is a required setting, in which case an exception will be raised if it is missing from the file """ # Read emr config file with open(config_file, 'r') as emr_conf_file: emr_config = json.load(emr_conf_file)['Cluster-Configurations'] # Check if the passed role name exist the emr-config file if role in emr_config.keys(): emr_role = role else: logger.info( f"Role name '{role}' not defined in the EMR config...using default config" ) emr_role = 'default' role_config = emr_config[emr_role] default_config = emr_config['InstanceGroup-Configurations'] # Read emr metadata file, which has account level settings like vpc, subnet with open(metadata_file) as account_metadata_file: metadata_config = json.load(account_metadata_file) required_keys = ['vpc_id', "private_subnet_ids", 'emr_version', 'keypair'] missing_keys = [] for key in required_keys: if not key in metadata_config: missing_keys.append(key) if len(missing_keys) != 0: raise KeyError('Required settings not found in file: ', metadata_file, missing_keys) # BOOTSTRAP ACTIONS are going to be a list, similar to steps. # required settings/configurations should be inserted at the start of the src_dir = os.path.dirname(os.path.dirname(__file__)) emr_sw_config_file = f'{src_dir}/conf/common/configurations.json' with open(emr_sw_config_file) as config_json: configuration = json.load(config_json) try: # EMR construct config parameters parameters = { "Name": cluster_name, "LogUri": role_config.get('EMRS3LogPath') or metadata_config.get('emr_s3_log_path'), "Configurations": configuration, "ReleaseLabel": role_config.get('emr_version') or metadata_config.get('emr_version'), "Instances": get_instance_configuration(user_inputs, role_config, default_config, metadata_config, cluster_name), "Applications": get_app_list(role_config.get('app_list')), "JobFlowRole": role_config.get('EC2Role') or constants.EC2_ROLE_NAME, "ServiceRole": role_config.get('EMRRole') or constants.EMR_ROLE_NAME, "AutoScalingRole": role_config.get('EC2Role') or constants.EMR_AUTOSCALING_ROLE_NAME, "BootstrapActions": get_bootstrap_actions(user_inputs, role_config, metadata_config), "Steps": get_emr_steps(role_config, metadata_config), "Tags": tag_list(role_config, metadata_config, cluster_name), "VisibleToAllUsers": True, "CustomAmiId": user_inputs.get('custom_ami_id') or metadata_config.get('custom_ami') or get_amazon_linux_ami() } except Exception as exception: logger.error("Error building EMR cluster creation parameters", exception) raise exceptions.MissingParametersError( f"Error building EMR cluster creation parameters {str(exception)}") return parameters