def lambda_handler(event, context): api_request_id = event.get('api_request_id') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id') success_response = { 'statusCode': 201, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, 'cluster_id': cluster_id, 'steps_count': 0 } error_response = { "statusCode": 500, "errorType": "NoClusterFound", "errorMessage": "Unable to fetch cluster step details." } logger.info("Getting steps by cluster_id") try: response = get_emr_steps(cluster_id) except Exception as e: logger.error(e) raise exceptions.EMRTestRunException(error_response) else: success_response.update(steps_count=len(response)) success_response.update(steps=response) return success_response
def lambda_handler(event, context): api_request_id = event.get('api_request_id') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id') success_response = { 'statusCode': 201, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, 'cluster_id': cluster_id, 'count': 0 } error_response = { "statusCode": 500, "errorType": "NoClusterFound", "errorMessage": "Unable to fetch cluster bootstrap information." } logger.info("Getting bootstraps by cluster_id") try: response = get_bootstrap_actions(cluster_id) except Exception as e: logger.error(e) raise exceptions.EMRTestRunException(error_response) else: success_response.update(bootstrap_count=response.get('Count')) success_response.update(bootstrap_names=response.get('Names')) return success_response
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name') cluster_id = event.get('cluster_id') step_ids = event.get('step_ids') # Define error json response for APIs error_response = construct_error_response(context, api_request_id) emr_step_list = [] for step_id in step_ids: try: response = emr_validate_step(cluster_id, step_id) emr_step_list.append(response) except Exception as error: logger.error(error) logger.error(f"Unable to validate EMR cluster step {step_id}") error_response.update( message='Unable to validate EMR cluster step') raise exceptions.EMRClusterValidateStepException(error_response) success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_id": cluster_id, "steps": emr_step_list } return success_response
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name', None) cluster_id = event.get('cluster_id', None) force = event.get('force', False) try: terminate_emr_cluster(cluster_name, cluster_id, force) except Exception as error: # Define error json response for APIs error_response = construct_error_response(context, api_request_id) error_response.update(status='TERMINATION_INITIATION_FAILED') error_response.update(message=str(error)) logger.error( f"Unable to terminate EMR cluster:{cluster_name} ...exiting \n error: {error}" ) raise exceptions.EMRClusterTerminateException(error_response) success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_id": cluster_id, "status": "TERMINATION_INITIATED", "message": "EMR termination initiated successfully" } return success_response
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id') instance_group = event.get('instance_group') or "TASK" # Define error json response for APIs error_response = { "statusCode": 500, "lambda_function_name": context.function_name, "log_group_name": context.log_group_name, "log_stream_name": context.log_stream_name, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id } try: emr_instance_group = get_instance_group_by_name( cluster_id, instance_group) except Exception as error: logger.error( f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group", error) error_response.update( Message= f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}" ) raise exceptions.EMRClusterAutoScalingException(error_response) try: response = remove_emr_auto_scaling(cluster_id, emr_instance_group) except Exception as error: logger.error( "Exception occurred while attempting to remove auto-scaling policy ...exiting", error) error_response.update( Message='Removal of Auto-Scaling policies failed') raise exceptions.EMRClusterAutoScalingException(error_response) return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_id": cluster_id, 'status': "SUCCEEDED" }
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id') instance_group = event.get('instance_group') or "TASK" # Define error json response for APIs error_response = { "statusCode": 500, "lambda_function_name": context.function_name, "log_group_name": context.log_group_name, "log_stream_name": context.log_stream_name, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id } try: emr_instance_group = get_instance_group_by_name( cluster_id, instance_group) except Exception as error: logger.error( f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group", error) error_response.update( Message= f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}" ) raise exceptions.EMRTestRunException(error_response) asg_policy = emr_instance_group.get('AutoScalingPolicy', {}) auto_scaling_state = asg_policy.get('Status', {}).get('State', 'DETACHED') auto_scaling_min = asg_policy.get('Constraints', {}).get('MinCapacity', 0) auto_scaling_max = asg_policy.get('Constraints', {}).get('MaxCapacity', 0) auto_scaling_rules = len(asg_policy.get('Rules', [])) return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_id": cluster_id, "instance_group": instance_group, "state": auto_scaling_state, "min": auto_scaling_min, "max": auto_scaling_max, "rules": auto_scaling_rules }
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name') cluster_id = event.get('cluster_id') step_list = event.get('steps') logger.info("Executing EMR add-step") # Add custom step added_step_list = [] for step in step_list: try: step_id_response = emr_add_step(cluster_id, [step]) emr_validate_response = emr_validate_step(cluster_id, step_id_response) added_step_list.append(emr_validate_response) except Exception as error: logger.error(f"Unable to add custom step {step}") # Define error json response for APIs error_response = construct_error_response(context, api_request_id) error_response.update(message='Unable to add custom step') raise exceptions.EMRClusterAddStepException(error_response) logger.info("EMR custom add-step response") success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_id": cluster_id, "steps": added_step_list } return success_response
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name', None) cluster_type = event.get('type', 'unknown').lower() if cluster_name is None or cluster_name == '': logger.error("Cluster name argument not passed ...exiting") # Define error json response for APIs error_response = construct_error_response(context, api_request_id) error_response.update(status='ClusterPreCheckFailed') error_response.update(message='Cluster name argument not passed') raise ClusterPreCheckException(error_response) # Define successful response success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "type": cluster_type } try: cluster_id = get_cluster_id(cluster_name) logger.info(f"Cluster with id {cluster_id} already exists") success_response.update(cluster_id=cluster_id) success_response.update(status='ClusterAlreadyExists') success_response.update(message='Cluster already exists') return success_response except MissingEMRCluster as error: logger.error(error) success_response.update(status='ClusterNotPresent') success_response.update(message='Cluster does not exist') return success_response
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name', None) cluster_id = event.get('cluster_id', None) app = event.get('app', 'hive').lower() cluster_type = event.get('cluster_type', 'unknown').lower() # Define error json response for APIs error_response = { "statusCode": 500, "cluster_type": cluster_type, "lambda_function_name": context.function_name, "log_group_name": context.log_group_name, "log_stream_name": context.log_stream_name, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id } if app not in ['spark', 'hive', 'customjar']: logger.error(f"Unsupported app type {app}") error_response.update(Message=f'Unsupported app type {app}') raise exceptions.EMRTestRunException(error_response) step_config_list = [] step_config = {} if app == 'spark': step_config['Name'] = constants.DEFAULT_TEST_SPARK_STEP_NAME step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION args_list = [constants.SPARK_SUBMIT_COMMAND] args_list += constants.DEFAULT_TEST_SPARK_ARGS jar_config = {'Jar': constants.SCRIPT_RUNNER_JAR, 'Args': args_list} step_config['HadoopJarStep'] = jar_config elif app == 'hive': step_config['Name'] = constants.DEFAULT_TEST_HIVE_STEP_NAME step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION args_list = [ constants.HIVE_SCRIPT_COMMAND, constants.RUN_HIVE_SCRIPT, constants.ARGS ] args_list += constants.DEFAULT_TEST_HIVE_ARGS jar_config = {'Jar': constants.COMMAND_RUNNER, 'Args': args_list} step_config['HadoopJarStep'] = jar_config elif app == 'customjar': step_config['Name'] = constants.DEFAULT_TEST_CUSTOM_JAR_STEP_NAME step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION # Step might fail same output dir is used, generate random output directory dir_suffix = ''.join( choice(string.ascii_lowercase + string.digits) for _ in range(5)) output_dir = constants.CUSTOM_JAT_TEST_ARGS_OUTPUT_PREFIX + dir_suffix args_list = constants.CUSTOM_JAR_TEST_ARGS args_list.append(output_dir) jar_config = {'Jar': constants.CUSTOM_TEST_JAR, 'Args': args_list} step_config['HadoopJarStep'] = jar_config step_config_list = [step_config] try: step_id = emr_add_step(cluster_id, step_config_list) except Exception as error: logger.error(f"Unable to test app {app} on emr cluster {cluster_id}", error) error_response.update(Message='EMR test app failed') raise exceptions.EMRTestRunException(error_response) else: validation_status = emr_validate_step(cluster_id, step_id) return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "app": app, "cluster_id": cluster_id, "step_id": validation_status.get('step_id'), "status": validation_status.get('status'), "message": validation_status.get('message') }
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') account = event.get('account') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name', None) cluster_id = event.get('cluster_id', None) step_name = event.get('step', None) cluster_type = event.get('cluster_type', 'unknown').lower() # Define error json response for APIs error_response = construct_error_response(context, api_request_id) # cluster name nonkerb-testing-prd1, role name would be testing role = cluster_name.split('-')[1] src_dir = os.path.dirname(os.path.dirname(__file__)) metadata_file = f'{src_dir}/conf/{account}/emr-metadata.json' emr_config_file = f'{src_dir}/conf/{account}/emr-{cluster_type}-config.json' if not all( [check_file_exist(metadata_file), check_file_exist(emr_config_file)]): error_response.update( Message='Metadata or config one of the file not found') print( FileReadError( path=metadata_file, message='Metadata or config one of the file not found')) print( FileReadError( path=emr_config_file, message='Metadata or config one of the file not found')) raise EMRClusterAddStepException(error_response) # Read emr config file with open(emr_config_file, 'r') as emr_conf_file: emr_config = json.load(emr_conf_file)['Cluster-Configurations'] # Check if the passed role name exist the emr-config file if role in emr_config.keys(): emr_role = role else: print( f"Role name '{role}' not defined in the EMR config...using default config" ) emr_role = 'default' role_config = emr_config[emr_role] # Read emr metadata file, which has account level settings like vpc, subnet with open(metadata_file) as account_metadata_file: metadata_config = json.load(account_metadata_file) emr_steps = [] # Get the role specific bootstrap actions from role config emr_steps.extend(role_config.get('steps', [])) # Get the cluster specific bootstrap actions from metadata config emr_steps.extend(metadata_config.get('steps', [])) step_config = {} # Filter the step name for step in emr_steps: # Check if step with name exist in emr_steps list if step_name == step.get('Name'): step_config['steps'] = [step] break if len(step_config) < 1: logger.error( f"Unable to find step with name {step_name} in the metadata file or role config file" ) error_response.update(Message='No step found') raise EMRClusterAddStepException(error_response) metadata_config = {} metadata_config.update(steps=[]) emr_step_config_list = get_emr_steps(role_config=step_config, metadata_config=metadata_config) try: step_id = emr_add_step(cluster_id, emr_step_config_list) except Exception as error: logger.error( f"Unable to add step with name {step_name} to emr cluster {cluster_id}" ) error_response.update(Message='EMR add step failed') raise EMRClusterAddStepException(error_response) return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_type": cluster_type, "cluster_id": cluster_id, "step_id": step_id, "status": 'PENDING', "message": "Step has been submitted" }
def lambda_handler(event, context): api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id') instance_group = event.get('instance_group') or "TASK" autoscaling_profile = event.get('autoscaling_profile') or 'Default' min_count = event.get('min') or 0 max_count = event.get('max') or 0 instance_count = event.get('instance_count') or 0 # Define error json response for APIs error_response = { "statusCode": 500, "lambda_function_name": context.function_name, "log_group_name": context.log_group_name, "log_stream_name": context.log_stream_name, "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id } as_minsize = -1 as_maxsize = -1 src_dir = os.path.dirname(os.path.dirname(__file__)) asg_config = f'{src_dir}/conf/autoscaling-config.json' # Parse autoscaling config json file try: with open(asg_config) as json_file: emr = json.load(json_file) emr_as_template = emr['Autoscaling_Policy_Template'] emr_as_profiles = emr['Autoscaling_Profiles'] except Exception as error: logger.error("Unable to parse emr config json.", error) logger.error( exceptions.FileReadError( path=asg_config, message='Metadata or config one of the file not found')) error_response.update( Message='Creation of Auto-Scaling policies failed') raise exceptions.EMRClusterAutoScalingException(error_response) if autoscaling_profile not in emr_as_profiles.keys(): autoscaling_profile = 'Default' as_profile = emr_as_profiles.get(autoscaling_profile.capitalize()) as_minsize = as_profile.get('min') as_maxsize = as_profile.get('max') # If no profile was specified, check if explicit min and max values were passed ... if (int(min_count) > 0) and (int(max_count) > 0): as_minsize = min_count as_maxsize = max_count # If no explicit values were passed, try to use the passed-in number of task nodes for the cluster .. if int(instance_count) > 0: as_minsize, as_maxsize = instance_count, instance_count # Attempt to locate the TASK group with the specified name (default name: "TASK") try: emr_instance_group = get_instance_group_by_name( cluster_id, instance_group) except Exception as error: logger.error( f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group", error) error_response.update( Message= f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}" ) raise exceptions.EMRClusterAutoScalingException(error_response) # If we didn't identify any specific user requested auto-scaling parameters, use the current cluster configuration as a guide # For minimum use the current task group size, or maximum use the max(current task size, 1) if as_minsize == -1 or as_maxsize == -1: if 'RequestedInstanceCount' in emr_instance_group.keys(): as_minsize = emr_instance_group['RequestedInstanceCount'] elif 'RunningInstanceCount' in emr_instance_group.keys(): as_minsize = emr_instance_group['RunningInstanceCount'] else: logger.error( "Exception occurred while attempting to attach auto-scaling policy" ) error_response.update( Message='Creation of Auto-Scaling policies failed') raise exceptions.EMRClusterAutoScalingException(error_response) # Prepare the auto-scaling policy based on the configuration template autoscaling_template = emr_as_template['AutoScalingPolicy'] if not autoscaling_template: logger.error( "Fatal error: Could not locate the Autoscaling policy template in the configuration" ) error_response.update( Message= 'Fatal error: Could not locate the Autoscaling policy template in the configuration' ) raise exceptions.EMRClusterAutoScalingException(error_response) # Modify the policy autoscaling_template['Constraints']['MinCapacity'] = int(as_minsize) autoscaling_template['Constraints']['MaxCapacity'] = int(as_maxsize) try: response = add_emr_auto_scaling(cluster_id, emr_instance_group, autoscaling_template) except Exception as error: logger.error( "Exception occurred while attempting to attach auto-scaling policy ...exiting", error) error_response.update( Message='Auto-Scaling policies attachment failed') raise exceptions.EMRClusterAutoScalingException(error_response) return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_id": cluster_id, 'status': json.dumps(response['AutoScalingPolicy']['Status']) }
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id') account = event.get('account') created_by = event.get('created_by', 'unknown') cluster_type = event.get('sub_type') role = event.get('segment', 'testing') name = event.get('name') # Form cluster name if not name or name == "": cluster_name = cluster_type + '-' + role else: cluster_name = name event.update({'cluster_name': cluster_name}) logger = setup_logging(api_request_id, context.aws_request_id) # Define error json response for APIs error_response = construct_error_response(context, api_request_id) # Check if cluster with same name exist? cluster_id = "" try: cluster_id = get_cluster_id(cluster_name) except Exception as error: logger.error(error) error_response.update(message=str(error)) if cluster_id.startswith('j-'): logger.info(f"Cluster with id {cluster_id} already exists") error_response.update(status='ClusterAlreadyExists') error_response.update( message= f'Cluster with name {cluster_name} already exists, exiting..') raise EMRClusterCreationException(error_response) src_dir = os.path.dirname(os.path.dirname(__file__)) emr_metadata = f'{src_dir}/conf/{account}/emr-metadata.json' emr_config_file = f'{src_dir}/conf/{account}/emr-{cluster_type}-config.json' user_inputs = event try: cluster_id = create_emr_cluster(user_inputs, emr_config_file, emr_metadata, role, cluster_type, cluster_name) except Exception as error: logger.error("Cluster creation failed", error) error_response.update(message=str(error)) raise EMRClusterCreationException(error_response) try: response = get_emr_cluster_status(cluster_id) except Exception as error: logger.error("Cluster creation failed", error) raise EMRClusterCreationException(error_response) dns_record = None try: r53_zone = json.load(open(emr_metadata)).get('r53_hosted_zone') except: logger.error( "error occured while fetching route53 zone name from metadata file" ) else: dns_record = f"{cluster_name}.{r53_zone}" success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "account": account, "segment": role, "cluster_name": cluster_name, "cluster_type": cluster_type, "cluster_id": cluster_id, "status": response.get('status'), "message": "EMR cluster launch initiated", "dns_name": dns_record } return success_response
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_name = event.get('cluster_name') cluster_id = event.get('cluster_id') termination_protection = event.get( 'termination_protected', 'status').lower() # enable/disable/status # Define error json response for APIs error_response = construct_error_response(context, api_request_id) if termination_protection not in {'enable', 'disable', 'status'}: logger.error( "Invalid action argument. Must be either 'enable', 'disable' or 'status' ...exiting" ) error_response.update(Message='Invalid action argument passed') raise exceptions.EMRClusterTerminationProtectionException( error_response) # Fetch cluster name, status based on clusterId input # check if termination protection is already enabled try: response_cluster_id = get_cluster_id(cluster_name) except Exception as error: logger.error(error) error_response.update( message= f"Unable to fetch EMR cluster information cluster name {cluster_name}" ) raise exceptions.EMRClusterTerminationProtectionException( error_response) if response_cluster_id != cluster_id: logger.error( f"EMR cluster id:{response_cluster_id} fetched from cluster_name and given cluster_id:{cluster_id} not matching" ) error_response.update( message= f"EMR cluster id: {response_cluster_id} fetched from cluster_name and given cluster_id: {cluster_id} not matching" ) raise exceptions.EMRClusterTerminationProtectionException( error_response) cluster_status = get_emr_cluster_status(cluster_id) if termination_protection == 'enable': if cluster_status.get('protection'): logger.error( f"Termination protection already enabled for EMR cluster:{cluster_id} @@@" ) error_response.update( message='Termination protection already enabled for EMR cluster' ) raise exceptions.EMRClusterTerminationProtectionException( error_response) elif termination_protection == 'disable': if not cluster_status.get('protection'): logger.error( f"Termination protection already disabled for EMR cluster:{cluster_id}" ) error_response.update( message= 'Termination protection already disabled for EMR cluster') raise exceptions.EMRClusterTerminationProtectionException( error_response) elif termination_protection == "status": return { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_id": cluster_id, "terminationProtected": 'enabled' if termination_protection else 'disabled' } set_protection = True if termination_protection == 'enable' else False # Set/Remove termination protection try: set_emr_termination_protection(cluster_id, set_protection) except Exception as error: logger.error(error) logger.error( f"Failed to enable/disable termination protection for clusterId:{cluster_id}" ) error_response.update( message='Failed to enable/disable termination protection') error_response.update(terminationProtected='FAILED') raise exceptions.EMRClusterTerminationProtectionException( error_response) success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": cluster_name, "cluster_id": cluster_id, "terminationProtected": 'enabled' if set_protection else 'disabled' } return success_response
def lambda_handler(event, context): """ Performs DNS flip based on dns_name and cluster_name :param event: :param context: :return: """ api_request_id = event.get('api_request_id', 'null') account = event.get('account') # AWS Account number action = event.get( 'action').lower() # DNS Action [ create, delete, update ] record = event.get('dns_name').lower() # DNS record name cluster_name = event.get('cluster_name') # EMR cluster name master_ip = event.get('master_ip') # IP of Master Node of Cluster logger = setup_logging(api_request_id, context.aws_request_id) success_response = { "statusCode": 200, "status": "Completed", "message": "DNS record changed successfully." } error_response = construct_error_response(context, api_request_id) error_response.update(message="Unable to update DNS record.") # Fetch the DNS name and retrieve hosted zone out of it, if DNS name is not provided throw an exception if event.get('dns_name'): hosted_zone = record.split(".", 1)[1] pass else: error_response.update(message='DNS Name is not passed for DNS Flip') raise exceptions.EMRDNSOperationsException(error_response) # Checking if non-empty Master IP is passed if not master_ip: error_response.update( message="Empty Master IP is passed for DNS Operation. Exiting.") raise exceptions.EMRDNSOperationsException(error_response) logger.info(record) logger.info(master_ip) logger.info(cluster_name) logger.info(hosted_zone) # Fetching Hosted Zone ID zone_id = get_dns_hostedid(hosted_zone) if zone_id is None: logger.error( "Unable to fetch the Hosted zone id of the given zone name :" + hosted_zone) error_response.update( message='Unable to fetch the Hosted zone id of the given zone name.' ) raise exceptions.EMRDNSOperationsException(error_response) # Fetching DNS record for Hosted Zone record_exist, record_response = get_dns_records(record, hosted_zone) if action == "create": if record_response: logger.error("record already exist.. %s" % record_response) error_response.update( message='Record Already Exist. It can not be created again.') raise exceptions.EMRDNSOperationsException(error_response) else: dns_name, message = dns_deupsert(action, cluster_name, record, master_ip, zone_id) if dns_name: success_response['message'] = message success_response['dnsName'] = dns_name logger.info(success_response) return json.dumps(success_response) else: error_response['message'] = message logger.error(error_response) raise exceptions.EMRDNSOperationsException(error_response) elif action == "update": if record_response: dns_name, message = dns_deupsert(action, cluster_name, record, master_ip, zone_id) if record_exist: success_response['message'] = message success_response['dnsName'] = dns_name logger.info(success_response) return json.dumps(success_response) else: error_response['message'] = message logger.error(error_response) raise exceptions.EMRDNSOperationsException(error_response) else: logger.error("Unable to update record. It does not exist.. %s" % record_response) error_response.update( message='Unable to update record. Record does not exist.') raise exceptions.EMRDNSOperationsException(error_response) elif action == "delete": if record_response: logger.info("record already exist.. %s" % record_response) dns_name, message = dns_deupsert(action, cluster_name, record, master_ip, zone_id) if record_exist: success_response['message'] = message success_response['dnsName'] = dns_name logger.info(success_response) return json.dumps(success_response) else: error_response['message'] = message logger.error(error_response) raise exceptions.EMRDNSOperationsException(error_response) else: logger.error( "Unable to delete record %s DNS entry does not exist." % record) error_response.update( message='Unable to delete record %s DNS entry does not exist.' % record) raise exceptions.EMRDNSOperationsException(error_response)
""" This module implements the common functions required for EMR cluster operations """ import botocore.exceptions from boto3 import Session from src.util.log import setup_logging from src.util import exceptions logger = setup_logging('emrlib', '') session = Session() emr = session.client('emr') def get_instance_group_by_name(cluster_id: str, task_group_name: str): """ Retrieve the group id of a task instance group based on its name :param cluster_id: The ClusterId :param task_group_name: The name of the task instance group :return: The instance group JSON object or None (if not found) """ try: response = emr.list_instance_groups(ClusterId=cluster_id) except Exception as error: raise botocore.exceptions.ClientError(error, 'emr_list_instance_groups') groups = response.get('InstanceGroups', []) for group in groups:
""" This module implements the common functions required for DNS flip operations """ import botocore.exceptions from boto3 import Session from src.util.log import setup_logging logger = setup_logging('dnslib', '') def get_dns_records(record, zone_name): """ Returns only the records of given cluster record. Args: record (dns name), zone_name (route 53 zone) Returns: boolean (true or false), record domain """ session = Session() route53 = session.client('route53') zone_id = get_dns_hostedid(zone_name) record = record + '.' if not record.endswith('.') else record logger.info("Retrieved Zone ID: ", zone_id) if zone_id is None: logger.info(
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') cluster_id = event.get('cluster_id') logger = setup_logging(api_request_id, context.aws_request_id) logger.info(f"Validating EMR cluster {cluster_id}") # Define generic response for Lambda fns & APIs success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_id": cluster_id, } try: cluster_name = get_cluster_name(cluster_id, terminated=True) except Exception as error: logger.info("Cluster does not exist.") # Define error json response for APIs error_response = construct_error_response(context, api_request_id) error_response.update(status='ClusterNotPresent') error_response.update( message=f'Cluster ID {cluster_id} does not exist') raise exceptions.EMRClusterValidationException(error_response) else: success_response.update(cluster_name=cluster_name) response = get_emr_cluster_status(cluster_id, detail=True) if response.get('status').upper() in ['WAITING', 'RUNNING']: logger.info("EMR cluster is up and running..") success_response.update(status=response.get('status').upper()) # Fetching RM url and master IP as cluster has been created successfully cluster_metadata = get_cluster_metadata(cluster_id) success_response.update(rm_url=cluster_metadata.get('rm_url')) success_response.update(master_ip=cluster_metadata.get('master_ip')) elif response.get( 'status').upper() in constants.LIST_CLUSTERS_PROVISION_STATES: logger.info("EMR cluster creation inprogress...") success_response.update(status=response.get('status').upper()) else: logger.info( f"EMR cluster failed with {response.get('status')} error \n message: {response.get('message')}" ) success_response.update(status="FAILED") try: # Get the security group from EMR cluster ec2 attributes master_sg_id = response.get('ec2_attributes').get('master_sg') if master_sg_id: interface_status = get_network_interface_association( master_sg_id) if interface_status: logger.info( "Security ID is attached to an interface, skipping the deletion.." ) else: # Remove all the rules for security group, before deleting empty_sg_rules(master_sg_id) # Delete the master and service security group of the emr if cluster is in terminated state delete_security_group(master_sg_id) service_sg_id = response.get('ec2_attributes').get('service_sg') if service_sg_id: interface_status = get_network_interface_association( service_sg_id) if interface_status: logger.info( "Security ID is attached to an interface, skipping the deletion.." ) else: # Remove all the rules for security group, before deleting empty_sg_rules(service_sg_id) # Delete the master and service security group of the emr if cluster is in terminated state delete_security_group(service_sg_id) except Exception as error: logger.error(error) return success_response
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id') metric_type = event.get('metric_type') logger = setup_logging(api_request_id, context.aws_request_id) error_response = { "statusCode": 500, "status": "Failed", "message": "Unable to call RM Url for fetching metrics." } if metric_type == "metrics": try: response = get_metrics_stats(event) except Exception as get_metrics_err: logger.error("Error getting cluster metrics from RM", get_metrics_err) error_response.update( Message="Error getting cluster metrics from RM.") raise exceptions.EMRRMProxyException(error_response) elif metric_type == "apps": try: response = get_apps_stats(event) except Exception as get_apps_err: logger.error("Error getting application metrics from RM", get_apps_err) error_response.update( Message="Error getting cluster metrics from RM.") raise exceptions.EMRRMProxyException(error_response) elif metric_type == "drElephant": try: response = get_drelephant_stats(event) except Exception as get_drelephant_err: logger.error("Error getting Dr Elephant results from RM", get_drelephant_err) error_response.update( Message="Error getting Dr Elephant results from RM.") raise exceptions.EMRRMProxyException(error_response) else: logger.warn( "Improper Type of Metrics Passed. Proper types are: metrics, apps, drElephant", metric_type) error_response.update( Message= "Improper Type of Metrics Passed. Proper types are: metrics, apps, drElephant" ) raise exceptions.EMRRMProxyException(error_response) rm_response = { "rm_url": event.get('rm_url'), "metric_type": metric_type, "metric_stats": response.decode("utf-8"), "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id } return rm_response
""" This module implements the common functions required for EMR cluster Resource Manager Operations """ import botocore.exceptions from botocore.vendored import requests from botocore.vendored.requests.exceptions import HTTPError from src.util.log import setup_logging logger = setup_logging('rmlib', '') def get_metrics_stats(event): """ Makes call to provided resource manager Url in event to fetch Cluster Metrics :param event: Lambda event containing request params :type event: lambda event :return: response content :rtype: string """ url_suffix = "/ws/v1/cluster/metrics" rm_url = event.get('rm_url') + url_suffix try: response = requests.get(rm_url, timeout=(5, 10)) print(response) response.raise_for_status() except HTTPError as http_err: logger.error("HTTP error occurred at get_metrics_stats:", http_err) raise botocore.exceptions.HTTPClientError(http_err, 'get_metrics_stats')
def lambda_handler(event, context): # Fetch API requestId if triggered via API g/w api_request_id = event.get('api_request_id', 'null') logger = setup_logging(api_request_id, context.aws_request_id) cluster_id = event.get('cluster_id', None) # Define error json response for APIs error_response = construct_error_response(context, api_request_id) if cluster_id is None: logger.error("ClusterId argument not passed ...exiting") error_response.update(status='ClusterStatusCheckFailed') error_response.update(message='ClusterId argument not passed') raise exceptions.ClusterStatusCheckException( json.dumps(error_response)) try: response = get_emr_cluster_status(cluster_id, detail=True) except Exception as error: logger.error("An error occurred ...exiting \n" + str(error)) error_response.update(status='ClusterStatusCheckFailed') error_response.update(message=f'{str(error)}') raise exceptions.ClusterStatusCheckException(error_response) else: success_response = { "api_request_id": api_request_id, "lambda_request_id": context.aws_request_id, "cluster_name": response.get('cluster_name'), "cluster_id": response.get('cluster_id'), "status": response.get('status'), "message": response.get('message') } try: if response.get('status') in constants.TERMINATED_STATES: # Get the security group from EMR cluster ec2 attributes master_sg_id = response.get('ec2_attributes').get('master_sg') if master_sg_id: interface_status = get_network_interface_association( master_sg_id) if interface_status: logger.info( "Security ID is attached to an interface, skipping the deletion.." ) else: # Remove all the rules for security group, before deleting empty_sg_rules(master_sg_id) # Delete the master and service security group of the emr if cluster is in terminated state delete_security_group(master_sg_id) service_sg_id = response.get('ec2_attributes').get('service_sg') if service_sg_id: interface_status = get_network_interface_association( service_sg_id) if interface_status: logger.info( "Security ID is attached to an interface, skipping the deletion.." ) else: # Remove all the rules for security group, before deleting empty_sg_rules(service_sg_id) # Delete the master and service security group of the emr if cluster is in terminated state delete_security_group(service_sg_id) except Exception as error: logger.error(error) logger.info( f"ClusterName: {response.get('cluster_name')} ClusterId: {response.get('cluster_id')}: Status: {response.get('status')}" ) return success_response