Esempio n. 1
0
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id')
    logger = setup_logging(api_request_id, context.aws_request_id)
    cluster_id = event.get('cluster_id')

    success_response = {
        'statusCode': 201,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        'cluster_id': cluster_id,
        'steps_count': 0
    }

    error_response = {
        "statusCode": 500,
        "errorType": "NoClusterFound",
        "errorMessage": "Unable to fetch cluster step details."
    }

    logger.info("Getting steps by cluster_id")
    try:
        response = get_emr_steps(cluster_id)
    except Exception as e:
        logger.error(e)
        raise exceptions.EMRTestRunException(error_response)
    else:
        success_response.update(steps_count=len(response))
        success_response.update(steps=response)

    return success_response
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id')
    logger = setup_logging(api_request_id, context.aws_request_id)
    cluster_id = event.get('cluster_id')

    success_response = {
        'statusCode': 201,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        'cluster_id': cluster_id,
        'count': 0
    }

    error_response = {
        "statusCode": 500,
        "errorType": "NoClusterFound",
        "errorMessage": "Unable to fetch cluster bootstrap information."
    }

    logger.info("Getting bootstraps by cluster_id")
    try:
        response = get_bootstrap_actions(cluster_id)
    except Exception as e:
        logger.error(e)
        raise exceptions.EMRTestRunException(error_response)
    else:
        success_response.update(bootstrap_count=response.get('Count'))
        success_response.update(bootstrap_names=response.get('Names'))

    return success_response
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name')
    cluster_id = event.get('cluster_id')
    step_ids = event.get('step_ids')

    # Define error json response for APIs
    error_response = construct_error_response(context, api_request_id)

    emr_step_list = []

    for step_id in step_ids:
        try:
            response = emr_validate_step(cluster_id, step_id)
            emr_step_list.append(response)

        except Exception as error:
            logger.error(error)
            logger.error(f"Unable to validate EMR cluster step {step_id}")
            error_response.update(
                message='Unable to validate EMR cluster step')
            raise exceptions.EMRClusterValidateStepException(error_response)

    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "cluster_id": cluster_id,
        "steps": emr_step_list
    }

    return success_response
Esempio n. 4
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name', None)
    cluster_id = event.get('cluster_id', None)

    force = event.get('force', False)

    try:
        terminate_emr_cluster(cluster_name, cluster_id, force)
    except Exception as error:
        # Define error json response for APIs
        error_response = construct_error_response(context, api_request_id)
        error_response.update(status='TERMINATION_INITIATION_FAILED')
        error_response.update(message=str(error))
        logger.error(
            f"Unable to terminate EMR cluster:{cluster_name} ...exiting \n error: {error}"
        )
        raise exceptions.EMRClusterTerminateException(error_response)

    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "cluster_id": cluster_id,
        "status": "TERMINATION_INITIATED",
        "message": "EMR termination initiated successfully"
    }

    return success_response
Esempio n. 5
0
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_id = event.get('cluster_id')
    instance_group = event.get('instance_group') or "TASK"

    # Define error json response for APIs
    error_response = {
        "statusCode": 500,
        "lambda_function_name": context.function_name,
        "log_group_name": context.log_group_name,
        "log_stream_name": context.log_stream_name,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id
    }

    try:
        emr_instance_group = get_instance_group_by_name(
            cluster_id, instance_group)
    except Exception as error:
        logger.error(
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group",
            error)
        error_response.update(
            Message=
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}"
        )
        raise exceptions.EMRClusterAutoScalingException(error_response)

    try:
        response = remove_emr_auto_scaling(cluster_id, emr_instance_group)

    except Exception as error:
        logger.error(
            "Exception occurred while attempting to remove auto-scaling policy ...exiting",
            error)
        error_response.update(
            Message='Removal of Auto-Scaling policies failed')
        raise exceptions.EMRClusterAutoScalingException(error_response)

    return {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_id": cluster_id,
        'status': "SUCCEEDED"
    }
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_id = event.get('cluster_id')
    instance_group = event.get('instance_group') or "TASK"

    # Define error json response for APIs
    error_response = {
        "statusCode": 500,
        "lambda_function_name": context.function_name,
        "log_group_name": context.log_group_name,
        "log_stream_name": context.log_stream_name,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id
    }

    try:
        emr_instance_group = get_instance_group_by_name(
            cluster_id, instance_group)
    except Exception as error:
        logger.error(
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group",
            error)
        error_response.update(
            Message=
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}"
        )
        raise exceptions.EMRTestRunException(error_response)

    asg_policy = emr_instance_group.get('AutoScalingPolicy', {})

    auto_scaling_state = asg_policy.get('Status', {}).get('State', 'DETACHED')
    auto_scaling_min = asg_policy.get('Constraints', {}).get('MinCapacity', 0)
    auto_scaling_max = asg_policy.get('Constraints', {}).get('MaxCapacity', 0)
    auto_scaling_rules = len(asg_policy.get('Rules', []))

    return {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_id": cluster_id,
        "instance_group": instance_group,
        "state": auto_scaling_state,
        "min": auto_scaling_min,
        "max": auto_scaling_max,
        "rules": auto_scaling_rules
    }
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name')
    cluster_id = event.get('cluster_id')
    step_list = event.get('steps')

    logger.info("Executing EMR add-step")

    # Add custom step
    added_step_list = []

    for step in step_list:
        try:
            step_id_response = emr_add_step(cluster_id, [step])
            emr_validate_response = emr_validate_step(cluster_id,
                                                      step_id_response)
            added_step_list.append(emr_validate_response)
        except Exception as error:
            logger.error(f"Unable to add custom step {step}")
            # Define error json response for APIs
            error_response = construct_error_response(context, api_request_id)
            error_response.update(message='Unable to add custom step')
            raise exceptions.EMRClusterAddStepException(error_response)

    logger.info("EMR custom add-step response")

    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "cluster_id": cluster_id,
        "steps": added_step_list
    }

    return success_response
Esempio n. 8
0
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')

    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name', None)
    cluster_type = event.get('type', 'unknown').lower()

    if cluster_name is None or cluster_name == '':
        logger.error("Cluster name argument not passed ...exiting")
        # Define error json response for APIs
        error_response = construct_error_response(context, api_request_id)
        error_response.update(status='ClusterPreCheckFailed')
        error_response.update(message='Cluster name argument not passed')
        raise ClusterPreCheckException(error_response)

    # Define successful response
    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "type": cluster_type
    }

    try:
        cluster_id = get_cluster_id(cluster_name)
        logger.info(f"Cluster with id {cluster_id} already exists")
        success_response.update(cluster_id=cluster_id)
        success_response.update(status='ClusterAlreadyExists')
        success_response.update(message='Cluster already exists')
        return success_response
    except MissingEMRCluster as error:
        logger.error(error)
        success_response.update(status='ClusterNotPresent')
        success_response.update(message='Cluster does not exist')
        return success_response
Esempio n. 9
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')

    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name', None)
    cluster_id = event.get('cluster_id', None)
    app = event.get('app', 'hive').lower()
    cluster_type = event.get('cluster_type', 'unknown').lower()

    # Define error json response for APIs
    error_response = {
        "statusCode": 500,
        "cluster_type": cluster_type,
        "lambda_function_name": context.function_name,
        "log_group_name": context.log_group_name,
        "log_stream_name": context.log_stream_name,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id
    }

    if app not in ['spark', 'hive', 'customjar']:
        logger.error(f"Unsupported app type {app}")
        error_response.update(Message=f'Unsupported app type {app}')
        raise exceptions.EMRTestRunException(error_response)

    step_config_list = []
    step_config = {}

    if app == 'spark':
        step_config['Name'] = constants.DEFAULT_TEST_SPARK_STEP_NAME
        step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION
        args_list = [constants.SPARK_SUBMIT_COMMAND]
        args_list += constants.DEFAULT_TEST_SPARK_ARGS
        jar_config = {'Jar': constants.SCRIPT_RUNNER_JAR, 'Args': args_list}
        step_config['HadoopJarStep'] = jar_config

    elif app == 'hive':
        step_config['Name'] = constants.DEFAULT_TEST_HIVE_STEP_NAME
        step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION
        args_list = [
            constants.HIVE_SCRIPT_COMMAND, constants.RUN_HIVE_SCRIPT,
            constants.ARGS
        ]
        args_list += constants.DEFAULT_TEST_HIVE_ARGS
        jar_config = {'Jar': constants.COMMAND_RUNNER, 'Args': args_list}
        step_config['HadoopJarStep'] = jar_config

    elif app == 'customjar':
        step_config['Name'] = constants.DEFAULT_TEST_CUSTOM_JAR_STEP_NAME
        step_config['ActionOnFailure'] = constants.DEFAULT_FAILURE_ACTION
        # Step might fail same output dir is used, generate random output directory

        dir_suffix = ''.join(
            choice(string.ascii_lowercase + string.digits) for _ in range(5))
        output_dir = constants.CUSTOM_JAT_TEST_ARGS_OUTPUT_PREFIX + dir_suffix

        args_list = constants.CUSTOM_JAR_TEST_ARGS
        args_list.append(output_dir)
        jar_config = {'Jar': constants.CUSTOM_TEST_JAR, 'Args': args_list}
        step_config['HadoopJarStep'] = jar_config

    step_config_list = [step_config]

    try:
        step_id = emr_add_step(cluster_id, step_config_list)
    except Exception as error:
        logger.error(f"Unable to test app  {app} on emr cluster {cluster_id}",
                     error)
        error_response.update(Message='EMR test app failed')
        raise exceptions.EMRTestRunException(error_response)
    else:
        validation_status = emr_validate_step(cluster_id, step_id)

    return {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "app": app,
        "cluster_id": cluster_id,
        "step_id": validation_status.get('step_id'),
        "status": validation_status.get('status'),
        "message": validation_status.get('message')
    }
Esempio n. 10
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')
    account = event.get('account')

    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name', None)
    cluster_id = event.get('cluster_id', None)
    step_name = event.get('step', None)
    cluster_type = event.get('cluster_type', 'unknown').lower()

    # Define error json response for APIs
    error_response = construct_error_response(context, api_request_id)

    # cluster name nonkerb-testing-prd1, role name would be testing
    role = cluster_name.split('-')[1]

    src_dir = os.path.dirname(os.path.dirname(__file__))
    metadata_file = f'{src_dir}/conf/{account}/emr-metadata.json'
    emr_config_file = f'{src_dir}/conf/{account}/emr-{cluster_type}-config.json'

    if not all(
        [check_file_exist(metadata_file),
         check_file_exist(emr_config_file)]):
        error_response.update(
            Message='Metadata or config one of the file not found')
        print(
            FileReadError(
                path=metadata_file,
                message='Metadata or config one of the file not found'))
        print(
            FileReadError(
                path=emr_config_file,
                message='Metadata or config one of the file not found'))
        raise EMRClusterAddStepException(error_response)

    # Read emr config file
    with open(emr_config_file, 'r') as emr_conf_file:
        emr_config = json.load(emr_conf_file)['Cluster-Configurations']

    # Check if the passed role name exist the emr-config file
    if role in emr_config.keys():
        emr_role = role
    else:
        print(
            f"Role name '{role}' not defined in the EMR config...using default config"
        )
        emr_role = 'default'

    role_config = emr_config[emr_role]

    # Read emr metadata file, which has account level settings like vpc, subnet
    with open(metadata_file) as account_metadata_file:
        metadata_config = json.load(account_metadata_file)

    emr_steps = []
    # Get the role specific bootstrap actions from role config
    emr_steps.extend(role_config.get('steps', []))
    # Get the cluster specific bootstrap actions from metadata config
    emr_steps.extend(metadata_config.get('steps', []))

    step_config = {}
    # Filter the step name
    for step in emr_steps:
        # Check if step with name exist in emr_steps list
        if step_name == step.get('Name'):
            step_config['steps'] = [step]
            break

    if len(step_config) < 1:
        logger.error(
            f"Unable to find step with name {step_name} in the metadata file or role config file"
        )
        error_response.update(Message='No step found')
        raise EMRClusterAddStepException(error_response)
    metadata_config = {}
    metadata_config.update(steps=[])
    emr_step_config_list = get_emr_steps(role_config=step_config,
                                         metadata_config=metadata_config)

    try:
        step_id = emr_add_step(cluster_id, emr_step_config_list)
    except Exception as error:
        logger.error(
            f"Unable to add step with name {step_name} to emr cluster {cluster_id}"
        )
        error_response.update(Message='EMR add step failed')
        raise EMRClusterAddStepException(error_response)

    return {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "cluster_type": cluster_type,
        "cluster_id": cluster_id,
        "step_id": step_id,
        "status": 'PENDING',
        "message": "Step has been submitted"
    }
def lambda_handler(event, context):
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_id = event.get('cluster_id')
    instance_group = event.get('instance_group') or "TASK"
    autoscaling_profile = event.get('autoscaling_profile') or 'Default'
    min_count = event.get('min') or 0
    max_count = event.get('max') or 0
    instance_count = event.get('instance_count') or 0

    # Define error json response for APIs
    error_response = {
        "statusCode": 500,
        "lambda_function_name": context.function_name,
        "log_group_name": context.log_group_name,
        "log_stream_name": context.log_stream_name,
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id
    }

    as_minsize = -1
    as_maxsize = -1
    src_dir = os.path.dirname(os.path.dirname(__file__))
    asg_config = f'{src_dir}/conf/autoscaling-config.json'

    # Parse autoscaling config json file
    try:
        with open(asg_config) as json_file:
            emr = json.load(json_file)
        emr_as_template = emr['Autoscaling_Policy_Template']
        emr_as_profiles = emr['Autoscaling_Profiles']
    except Exception as error:
        logger.error("Unable to parse emr config json.", error)
        logger.error(
            exceptions.FileReadError(
                path=asg_config,
                message='Metadata or config one of the file not found'))
        error_response.update(
            Message='Creation of Auto-Scaling policies failed')
        raise exceptions.EMRClusterAutoScalingException(error_response)

    if autoscaling_profile not in emr_as_profiles.keys():
        autoscaling_profile = 'Default'

    as_profile = emr_as_profiles.get(autoscaling_profile.capitalize())
    as_minsize = as_profile.get('min')
    as_maxsize = as_profile.get('max')

    # If no profile was specified, check if explicit min and max values were passed ...
    if (int(min_count) > 0) and (int(max_count) > 0):
        as_minsize = min_count
        as_maxsize = max_count

    # If no explicit values were passed, try to use the passed-in number of task nodes for the cluster ..
    if int(instance_count) > 0:
        as_minsize, as_maxsize = instance_count, instance_count

    # Attempt to locate the TASK group with the specified name (default name: "TASK")

    try:
        emr_instance_group = get_instance_group_by_name(
            cluster_id, instance_group)
    except Exception as error:
        logger.error(
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group",
            error)
        error_response.update(
            Message=
            f"EMR ClusterId:{cluster_id} does not contain a {instance_group} instance group , error: {str(error)}"
        )
        raise exceptions.EMRClusterAutoScalingException(error_response)

    # If we didn't identify any specific user requested auto-scaling parameters, use the current cluster configuration as a guide
    # For minimum use the current task group size, or maximum use the max(current task size, 1)
    if as_minsize == -1 or as_maxsize == -1:
        if 'RequestedInstanceCount' in emr_instance_group.keys():
            as_minsize = emr_instance_group['RequestedInstanceCount']
        elif 'RunningInstanceCount' in emr_instance_group.keys():
            as_minsize = emr_instance_group['RunningInstanceCount']
        else:
            logger.error(
                "Exception occurred while attempting to attach auto-scaling policy"
            )
            error_response.update(
                Message='Creation of Auto-Scaling policies failed')
            raise exceptions.EMRClusterAutoScalingException(error_response)

    # Prepare the auto-scaling policy based on the configuration template
    autoscaling_template = emr_as_template['AutoScalingPolicy']
    if not autoscaling_template:
        logger.error(
            "Fatal error: Could not locate the Autoscaling policy template in the configuration"
        )
        error_response.update(
            Message=
            'Fatal error: Could not locate the Autoscaling policy template in the configuration'
        )
        raise exceptions.EMRClusterAutoScalingException(error_response)

    # Modify the policy
    autoscaling_template['Constraints']['MinCapacity'] = int(as_minsize)
    autoscaling_template['Constraints']['MaxCapacity'] = int(as_maxsize)

    try:
        response = add_emr_auto_scaling(cluster_id, emr_instance_group,
                                        autoscaling_template)
    except Exception as error:
        logger.error(
            "Exception occurred while attempting to attach auto-scaling policy ...exiting",
            error)
        error_response.update(
            Message='Auto-Scaling policies attachment failed')
        raise exceptions.EMRClusterAutoScalingException(error_response)

    return {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_id": cluster_id,
        'status': json.dumps(response['AutoScalingPolicy']['Status'])
    }
Esempio n. 12
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id')
    account = event.get('account')
    created_by = event.get('created_by', 'unknown')
    cluster_type = event.get('sub_type')
    role = event.get('segment', 'testing')
    name = event.get('name')

    # Form cluster name
    if not name or name == "":
        cluster_name = cluster_type + '-' + role
    else:
        cluster_name = name
    event.update({'cluster_name': cluster_name})

    logger = setup_logging(api_request_id, context.aws_request_id)

    # Define error json response for APIs
    error_response = construct_error_response(context, api_request_id)

    # Check if cluster with same name exist?
    cluster_id = ""
    try:
        cluster_id = get_cluster_id(cluster_name)
    except Exception as error:
        logger.error(error)
        error_response.update(message=str(error))

    if cluster_id.startswith('j-'):
        logger.info(f"Cluster with id {cluster_id} already exists")
        error_response.update(status='ClusterAlreadyExists')
        error_response.update(
            message=
            f'Cluster with name {cluster_name} already exists, exiting..')
        raise EMRClusterCreationException(error_response)

    src_dir = os.path.dirname(os.path.dirname(__file__))
    emr_metadata = f'{src_dir}/conf/{account}/emr-metadata.json'
    emr_config_file = f'{src_dir}/conf/{account}/emr-{cluster_type}-config.json'
    user_inputs = event

    try:
        cluster_id = create_emr_cluster(user_inputs, emr_config_file,
                                        emr_metadata, role, cluster_type,
                                        cluster_name)
    except Exception as error:
        logger.error("Cluster creation failed", error)
        error_response.update(message=str(error))
        raise EMRClusterCreationException(error_response)

    try:
        response = get_emr_cluster_status(cluster_id)
    except Exception as error:
        logger.error("Cluster creation failed", error)
        raise EMRClusterCreationException(error_response)

    dns_record = None

    try:
        r53_zone = json.load(open(emr_metadata)).get('r53_hosted_zone')
    except:
        logger.error(
            "error occured while fetching route53 zone name from metadata file"
        )
    else:
        dns_record = f"{cluster_name}.{r53_zone}"

    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "account": account,
        "segment": role,
        "cluster_name": cluster_name,
        "cluster_type": cluster_type,
        "cluster_id": cluster_id,
        "status": response.get('status'),
        "message": "EMR cluster launch initiated",
        "dns_name": dns_record
    }
    return success_response
Esempio n. 13
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')
    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_name = event.get('cluster_name')
    cluster_id = event.get('cluster_id')
    termination_protection = event.get(
        'termination_protected', 'status').lower()  # enable/disable/status

    # Define error json response for APIs
    error_response = construct_error_response(context, api_request_id)

    if termination_protection not in {'enable', 'disable', 'status'}:
        logger.error(
            "Invalid action argument. Must be either 'enable', 'disable' or 'status' ...exiting"
        )
        error_response.update(Message='Invalid action argument passed')
        raise exceptions.EMRClusterTerminationProtectionException(
            error_response)

    # Fetch cluster name, status based on clusterId input
    # check if termination protection is already enabled

    try:
        response_cluster_id = get_cluster_id(cluster_name)
    except Exception as error:
        logger.error(error)
        error_response.update(
            message=
            f"Unable to fetch EMR cluster information cluster name {cluster_name}"
        )
        raise exceptions.EMRClusterTerminationProtectionException(
            error_response)

    if response_cluster_id != cluster_id:
        logger.error(
            f"EMR cluster id:{response_cluster_id} fetched from cluster_name and given cluster_id:{cluster_id} not matching"
        )
        error_response.update(
            message=
            f"EMR cluster id: {response_cluster_id} fetched from cluster_name and given cluster_id: {cluster_id} not matching"
        )
        raise exceptions.EMRClusterTerminationProtectionException(
            error_response)

    cluster_status = get_emr_cluster_status(cluster_id)

    if termination_protection == 'enable':
        if cluster_status.get('protection'):
            logger.error(
                f"Termination protection already enabled for EMR cluster:{cluster_id} @@@"
            )
            error_response.update(
                message='Termination protection already enabled for EMR cluster'
            )
            raise exceptions.EMRClusterTerminationProtectionException(
                error_response)

    elif termination_protection == 'disable':
        if not cluster_status.get('protection'):
            logger.error(
                f"Termination protection already disabled for EMR cluster:{cluster_id}"
            )
            error_response.update(
                message=
                'Termination protection already disabled for EMR cluster')
            raise exceptions.EMRClusterTerminationProtectionException(
                error_response)

    elif termination_protection == "status":
        return {
            "api_request_id":
            api_request_id,
            "lambda_request_id":
            context.aws_request_id,
            "cluster_name":
            cluster_name,
            "cluster_id":
            cluster_id,
            "terminationProtected":
            'enabled' if termination_protection else 'disabled'
        }

    set_protection = True if termination_protection == 'enable' else False

    # Set/Remove termination protection
    try:
        set_emr_termination_protection(cluster_id, set_protection)
    except Exception as error:
        logger.error(error)
        logger.error(
            f"Failed to enable/disable termination protection for clusterId:{cluster_id}"
        )
        error_response.update(
            message='Failed to enable/disable termination protection')
        error_response.update(terminationProtected='FAILED')
        raise exceptions.EMRClusterTerminationProtectionException(
            error_response)

    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_name": cluster_name,
        "cluster_id": cluster_id,
        "terminationProtected": 'enabled' if set_protection else 'disabled'
    }

    return success_response
Esempio n. 14
0
def lambda_handler(event, context):
    """
    Performs DNS flip based on dns_name and cluster_name
    :param event:
    :param context:
    :return:
    """
    api_request_id = event.get('api_request_id', 'null')
    account = event.get('account')  # AWS Account number
    action = event.get(
        'action').lower()  # DNS Action [ create, delete, update ]
    record = event.get('dns_name').lower()  # DNS record name
    cluster_name = event.get('cluster_name')  # EMR cluster name
    master_ip = event.get('master_ip')  # IP of Master Node of Cluster

    logger = setup_logging(api_request_id, context.aws_request_id)

    success_response = {
        "statusCode": 200,
        "status": "Completed",
        "message": "DNS record changed successfully."
    }

    error_response = construct_error_response(context, api_request_id)
    error_response.update(message="Unable to update DNS record.")

    # Fetch the DNS name and retrieve hosted zone out of it, if DNS name is not provided throw an exception
    if event.get('dns_name'):
        hosted_zone = record.split(".", 1)[1]
        pass
    else:
        error_response.update(message='DNS Name is not passed for DNS Flip')
        raise exceptions.EMRDNSOperationsException(error_response)

    # Checking if non-empty Master IP is passed
    if not master_ip:
        error_response.update(
            message="Empty Master IP is passed for DNS Operation. Exiting.")
        raise exceptions.EMRDNSOperationsException(error_response)

    logger.info(record)
    logger.info(master_ip)
    logger.info(cluster_name)
    logger.info(hosted_zone)

    # Fetching Hosted Zone ID
    zone_id = get_dns_hostedid(hosted_zone)

    if zone_id is None:
        logger.error(
            "Unable to fetch the Hosted zone id of the given zone name :" +
            hosted_zone)
        error_response.update(
            message='Unable to fetch the Hosted zone id of the given zone name.'
        )
        raise exceptions.EMRDNSOperationsException(error_response)

    # Fetching DNS record for Hosted Zone
    record_exist, record_response = get_dns_records(record, hosted_zone)

    if action == "create":
        if record_response:
            logger.error("record already exist.. %s" % record_response)
            error_response.update(
                message='Record Already Exist. It can not be created again.')
            raise exceptions.EMRDNSOperationsException(error_response)
        else:
            dns_name, message = dns_deupsert(action, cluster_name, record,
                                             master_ip, zone_id)
            if dns_name:
                success_response['message'] = message
                success_response['dnsName'] = dns_name
                logger.info(success_response)
                return json.dumps(success_response)
            else:
                error_response['message'] = message
                logger.error(error_response)
                raise exceptions.EMRDNSOperationsException(error_response)

    elif action == "update":
        if record_response:
            dns_name, message = dns_deupsert(action, cluster_name, record,
                                             master_ip, zone_id)
            if record_exist:
                success_response['message'] = message
                success_response['dnsName'] = dns_name
                logger.info(success_response)
                return json.dumps(success_response)
            else:
                error_response['message'] = message
                logger.error(error_response)
                raise exceptions.EMRDNSOperationsException(error_response)

        else:
            logger.error("Unable to update record. It does not exist.. %s" %
                         record_response)
            error_response.update(
                message='Unable to update record. Record does not exist.')
            raise exceptions.EMRDNSOperationsException(error_response)

    elif action == "delete":
        if record_response:
            logger.info("record already exist.. %s" % record_response)
            dns_name, message = dns_deupsert(action, cluster_name, record,
                                             master_ip, zone_id)
            if record_exist:
                success_response['message'] = message
                success_response['dnsName'] = dns_name
                logger.info(success_response)
                return json.dumps(success_response)
            else:
                error_response['message'] = message
                logger.error(error_response)
                raise exceptions.EMRDNSOperationsException(error_response)

        else:
            logger.error(
                "Unable to delete record %s DNS entry does not exist." %
                record)
            error_response.update(
                message='Unable to delete record %s DNS entry does not exist.'
                % record)
            raise exceptions.EMRDNSOperationsException(error_response)
Esempio n. 15
0
"""
This module implements the common functions required for EMR cluster operations
"""

import botocore.exceptions
from boto3 import Session

from src.util.log import setup_logging
from src.util import exceptions

logger = setup_logging('emrlib', '')
session = Session()
emr = session.client('emr')


def get_instance_group_by_name(cluster_id: str, task_group_name: str):
    """
        Retrieve the group id of a task instance group based on its name

        :param cluster_id: The ClusterId
        :param task_group_name: The name of the task instance group
        :return: The instance group JSON object or None (if not found)
        """
    try:
        response = emr.list_instance_groups(ClusterId=cluster_id)
    except Exception as error:
        raise botocore.exceptions.ClientError(error,
                                              'emr_list_instance_groups')

    groups = response.get('InstanceGroups', [])
    for group in groups:
Esempio n. 16
0
"""
This module implements the common functions required for DNS flip operations
"""

import botocore.exceptions
from boto3 import Session

from src.util.log import setup_logging

logger = setup_logging('dnslib', '')


def get_dns_records(record, zone_name):
    """
    Returns only the records of given cluster record.
    Args:
        record (dns name), zone_name (route 53 zone)

    Returns:
        boolean (true or false), record domain
    """
    session = Session()
    route53 = session.client('route53')

    zone_id = get_dns_hostedid(zone_name)
    record = record + '.' if not record.endswith('.') else record

    logger.info("Retrieved Zone ID: ", zone_id)

    if zone_id is None:
        logger.info(
Esempio n. 17
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')
    cluster_id = event.get('cluster_id')

    logger = setup_logging(api_request_id, context.aws_request_id)

    logger.info(f"Validating EMR cluster {cluster_id}")

    # Define generic response for Lambda fns & APIs
    success_response = {
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id,
        "cluster_id": cluster_id,
    }

    try:
        cluster_name = get_cluster_name(cluster_id, terminated=True)
    except Exception as error:
        logger.info("Cluster does not exist.")
        # Define error json response for APIs
        error_response = construct_error_response(context, api_request_id)
        error_response.update(status='ClusterNotPresent')
        error_response.update(
            message=f'Cluster ID {cluster_id} does not exist')
        raise exceptions.EMRClusterValidationException(error_response)
    else:
        success_response.update(cluster_name=cluster_name)

    response = get_emr_cluster_status(cluster_id, detail=True)

    if response.get('status').upper() in ['WAITING', 'RUNNING']:
        logger.info("EMR cluster is up and running..")
        success_response.update(status=response.get('status').upper())

        # Fetching RM url and master IP as cluster has been created successfully
        cluster_metadata = get_cluster_metadata(cluster_id)
        success_response.update(rm_url=cluster_metadata.get('rm_url'))
        success_response.update(master_ip=cluster_metadata.get('master_ip'))

    elif response.get(
            'status').upper() in constants.LIST_CLUSTERS_PROVISION_STATES:
        logger.info("EMR cluster creation inprogress...")
        success_response.update(status=response.get('status').upper())

    else:
        logger.info(
            f"EMR cluster failed with {response.get('status')} error \n message: {response.get('message')}"
        )
        success_response.update(status="FAILED")

        try:
            # Get the security group from EMR cluster ec2 attributes
            master_sg_id = response.get('ec2_attributes').get('master_sg')
            if master_sg_id:
                interface_status = get_network_interface_association(
                    master_sg_id)

                if interface_status:
                    logger.info(
                        "Security ID is attached to an interface, skipping the deletion.."
                    )
                else:
                    # Remove all the rules for security group, before deleting
                    empty_sg_rules(master_sg_id)
                    # Delete the master and service security group of the emr if cluster is in terminated state
                    delete_security_group(master_sg_id)

            service_sg_id = response.get('ec2_attributes').get('service_sg')
            if service_sg_id:
                interface_status = get_network_interface_association(
                    service_sg_id)

                if interface_status:
                    logger.info(
                        "Security ID is attached to an interface, skipping the deletion.."
                    )
                else:
                    # Remove all the rules for security group, before deleting
                    empty_sg_rules(service_sg_id)
                    # Delete the master and service security group of the emr if cluster is in terminated state
                    delete_security_group(service_sg_id)

        except Exception as error:
            logger.error(error)

    return success_response
Esempio n. 18
0
def lambda_handler(event, context):

    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id')
    metric_type = event.get('metric_type')

    logger = setup_logging(api_request_id, context.aws_request_id)

    error_response = {
        "statusCode": 500,
        "status": "Failed",
        "message": "Unable to call RM Url for fetching metrics."
    }

    if metric_type == "metrics":
        try:
            response = get_metrics_stats(event)
        except Exception as get_metrics_err:
            logger.error("Error getting cluster metrics from RM",
                         get_metrics_err)
            error_response.update(
                Message="Error getting cluster metrics from RM.")
            raise exceptions.EMRRMProxyException(error_response)

    elif metric_type == "apps":
        try:
            response = get_apps_stats(event)
        except Exception as get_apps_err:
            logger.error("Error getting application metrics from RM",
                         get_apps_err)
            error_response.update(
                Message="Error getting cluster metrics from RM.")
            raise exceptions.EMRRMProxyException(error_response)

    elif metric_type == "drElephant":
        try:
            response = get_drelephant_stats(event)
        except Exception as get_drelephant_err:
            logger.error("Error getting Dr Elephant results from RM",
                         get_drelephant_err)
            error_response.update(
                Message="Error getting Dr Elephant results from RM.")
            raise exceptions.EMRRMProxyException(error_response)

    else:
        logger.warn(
            "Improper Type of Metrics Passed. Proper types are: metrics, apps, drElephant",
            metric_type)
        error_response.update(
            Message=
            "Improper Type of Metrics Passed. Proper types are: metrics, apps, drElephant"
        )
        raise exceptions.EMRRMProxyException(error_response)

    rm_response = {
        "rm_url": event.get('rm_url'),
        "metric_type": metric_type,
        "metric_stats": response.decode("utf-8"),
        "api_request_id": api_request_id,
        "lambda_request_id": context.aws_request_id
    }

    return rm_response
Esempio n. 19
0
"""
This module implements the common functions required for EMR cluster Resource Manager Operations
"""

import botocore.exceptions

from botocore.vendored import requests
from botocore.vendored.requests.exceptions import HTTPError
from src.util.log import setup_logging

logger = setup_logging('rmlib', '')


def get_metrics_stats(event):
    """
    Makes call to provided resource manager Url in event to fetch Cluster Metrics
    :param event: Lambda event containing request params
    :type event: lambda event
    :return: response content
    :rtype: string
    """
    url_suffix = "/ws/v1/cluster/metrics"
    rm_url = event.get('rm_url') + url_suffix

    try:
        response = requests.get(rm_url, timeout=(5, 10))
        print(response)
        response.raise_for_status()
    except HTTPError as http_err:
        logger.error("HTTP error occurred at get_metrics_stats:", http_err)
        raise botocore.exceptions.HTTPClientError(http_err, 'get_metrics_stats')
Esempio n. 20
0
def lambda_handler(event, context):
    # Fetch API requestId if triggered via API g/w
    api_request_id = event.get('api_request_id', 'null')

    logger = setup_logging(api_request_id, context.aws_request_id)

    cluster_id = event.get('cluster_id', None)

    # Define error json response for APIs
    error_response = construct_error_response(context, api_request_id)

    if cluster_id is None:
        logger.error("ClusterId argument not passed ...exiting")
        error_response.update(status='ClusterStatusCheckFailed')
        error_response.update(message='ClusterId argument not passed')
        raise exceptions.ClusterStatusCheckException(
            json.dumps(error_response))

    try:
        response = get_emr_cluster_status(cluster_id, detail=True)
    except Exception as error:
        logger.error("An error occurred ...exiting \n" + str(error))
        error_response.update(status='ClusterStatusCheckFailed')
        error_response.update(message=f'{str(error)}')
        raise exceptions.ClusterStatusCheckException(error_response)
    else:
        success_response = {
            "api_request_id": api_request_id,
            "lambda_request_id": context.aws_request_id,
            "cluster_name": response.get('cluster_name'),
            "cluster_id": response.get('cluster_id'),
            "status": response.get('status'),
            "message": response.get('message')
        }
    try:
        if response.get('status') in constants.TERMINATED_STATES:
            # Get the security group from EMR cluster ec2 attributes
            master_sg_id = response.get('ec2_attributes').get('master_sg')
            if master_sg_id:
                interface_status = get_network_interface_association(
                    master_sg_id)

                if interface_status:
                    logger.info(
                        "Security ID is attached to an interface, skipping the deletion.."
                    )
                else:
                    # Remove all the rules for security group, before deleting
                    empty_sg_rules(master_sg_id)
                    # Delete the master and service security group of the emr if cluster is in terminated state
                    delete_security_group(master_sg_id)

            service_sg_id = response.get('ec2_attributes').get('service_sg')
            if service_sg_id:
                interface_status = get_network_interface_association(
                    service_sg_id)

                if interface_status:
                    logger.info(
                        "Security ID is attached to an interface, skipping the deletion.."
                    )
                else:
                    # Remove all the rules for security group, before deleting
                    empty_sg_rules(service_sg_id)
                    # Delete the master and service security group of the emr if cluster is in terminated state
                    delete_security_group(service_sg_id)

    except Exception as error:
        logger.error(error)

    logger.info(
        f"ClusterName: {response.get('cluster_name')}  ClusterId: {response.get('cluster_id')}:  Status: {response.get('status')}"
    )
    return success_response