Example #1
0
def update_ddb_stage_control(item, file_status, timestamp):
    try:
        table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE)

        response = table_stage.update_item(
            TableName=DYNAMO_DB_STAGE_TABLE,
            Key={
                's3_object_name_stage': item
            },
            UpdateExpression="set file_status = :file_status, timestamp_step_finished =:timestamp_step_finished",
            ExpressionAttributeValues={
                ':file_status': file_status,
                ':timestamp_step_finished': timestamp}
        )
        logger.debug('SNS publish response: {}'.format(response))
    except Exception as e:
        msg_exception = "DynamoDB Exception: {}".format(e)
        logger.info(msg_exception)
        send_notification(
            SNS_TOPIC_ARN,
            'AWS Lambda: ValidateJobSubmit'
            ' error: Unable to update DynamoDB Item.\nError: {}'.format(e),
            'Datalake:{} Lambda Error'.format(ENVIRONMENT)
        )
        return 'Unable to update Item from table'
Example #2
0
def check_files_shutdown_emr(event_cluster_id, context):
    # check if there are files pending to be processed
    try:
        table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE)
        results_stage = table_stage.scan()
    except Exception as e:
        logger.error("Error Reading DynamoDB Table: {}".format(e))
        send_notification(
            SNS_TOPIC_ARN,
            'AWS Lambda: {function_name}'
            ' error: Failed shutdown EMR cluster.\nError: {error}'.format(
                function_name=context.function_name,
                error=e
            ),
            'Datalake:{} Lambda Error'.format(ENVIRONMENT)
        )
        return 'Unable to Scan table'

    if not results_stage.get('Items'):
        msg = 'DynamoDB Stage Table {} is empty'.format(DYNAMO_DB_STAGE_TABLE)
        logger.info(msg)
        # EMR shutdown
        response = emr_client.terminate_job_flows(
            JobFlowIds=[
                event_cluster_id,
            ]
        )
        return response
def generate_test_notification_simple_string_notif(value):
    send_notification({
        "xpath":
        "/test-notifications:string-container-simple-string-changed",
        "values": [{
            "xpath":
            ("/test-notifications:string-container-simple-string-changed"
             "/test-notifications:new-value"),
            "value":
            value,
        }],
    })
def generate_test_notification_container_string_notif(value):
    send_notification({
        "xpath": ("/test-notifications:notification-from-container"
                  "/test-notifications:container-notification-string-changed"),
        "values": [{
            "xpath":
            ("/test-notifications:notification-from-container"
             "/test-notifications:container-notification-string-changed"
             "/test-notifications:new-value"),
            "value":
            value,
        }],
    })
def check_spark_submit_rule_enabled():
    try:
        resp = events_client.describe_rule(Name=EVENT_SPARK_SUBMIT)
        logger.debug('Describe Rule response: {}'.format(resp))
        rule_status = resp.get('State', 'DISABLED')
    except ClientError as er:
        msg_exception = "Describing Events Rule Exception: {}".format(er)
        logger.error(msg_exception)
        send_notification(
            SNS_TOPIC_ARN,
            "Data Lake:{} Spark Submit Exception".format(ENVIRONMENT),
            "CloudWatch Describe Rule request error: {}".format(msg_exception))
        raise Exception('Unable to request API events:DescribeRule')
    return rule_status
Example #6
0
def main():
    config.load_config()

    (options, args) = parser.parse_args()

    if not options.to:
        print "Please specify target address with --to [email protected]"
        sys.exit(1)

    print "Generating templates for {}".format(options.to)

    common.send_notification(options.to, config.this_email(),
                             config.get_random_dataset())
    common.generate_schedule(options.to)
def generate_test_notification_list_foo_string_notif(key, value):
    send_notification({
        "xpath": ("/test-notifications:notification-from-list"
                  "/test-notifications:notification-from-list[name='{}']"
                  "/test-notifications:list-foo-changed").format(key),
        "values": [{
            "xpath": ("/test-notifications:notification-from-list"
                      "/test-notifications:notification-from-list[name='{}']"
                      "/test-notifications:list-foo-changed"
                      "/test-notifications:new-value").format(key),
            "value":
            value,
        }],
    })
Example #8
0
def test_basic_notification(mgr):
    mgr.dispatch(
        to_ele("""
            <create-subscription xmlns="urn:ietf:params:xml:ns:netconf:notification:1.0">
              <filter>
                <hardware-state-change xmlns="urn:ietf:params:xml:ns:yang:ietf-hardware" />
              </filter>
            </create-subscription>
            """))
    send_notification({
        "xpath": "/ietf-hardware:hardware-state-change",
        "values": []
    })
    n = mgr.take_notification(timeout=10)
    assert n.notification_ele.xpath("//ietf-hw:hardware-state-change",
                                    namespaces=NS_MAP)
Example #9
0
def get_object_metadata(bucket, key):
    s3_object_name_raw = None
    try:
        resp = s3_client.head_object(Bucket=bucket, Key=key)
        logger.debug('S3 object head: {}'.format(resp))
        for key, value in resp.get('Metadata', {}).items():
            if key == METADATA_OBJECT_NAME_RAW:
                s3_object_name_raw = value

        return s3_object_name_raw
    except Exception as e:
        logger.error("S3 Exception: {}".format(e))
        send_notification(SNS_TOPIC_ARN,
                          "Data Lake: Update DynamoDB Stage Exception",
                          "S3 Head object error: {}".format(e))
        raise e
Example #10
0
def get_object_tag(bucket, key):
    s3_object_name_raw = None
    try:
        resp = s3_client.get_object_tagging(Bucket=bucket, Key=key)
        logger.debug('S3 object tags: {}'.format(resp))
        for tag in resp.get('TagSet', []):
            if tag.get('Key') == TAG_OBJECT_NAME_RAW:
                s3_object_name_raw = tag.get('Value')

        return s3_object_name_raw
    except Exception as e:
        logger.error("S3 Exception: {}".format(e))
        send_notification(SNS_TOPIC_ARN,
                          "Data Lake: Update DynamoDB Stage Exception",
                          "S3 Get Tags error: {}".format(e))
        raise e
def generate_test_notification_embedded_list_string_notif(key1, key2, value):
    send_notification({
        "xpath":
        ("/test-notifications:notification-from-list"
         "/test-notifications:notification-from-list[name='{}']"
         "/test-notifications:embedded-list[name='{}']"
         "/test-notifications:embedded-foo-changed").format(key1, key2),
        "values": [{
            "xpath": ("/test-notifications:notification-from-list"
                      "/test-notifications:notification-from-list[name='{}']"
                      "/test-notifications:embedded-list[name='{}']"
                      "/test-notifications:embedded-foo-changed"
                      "/test-notifications:new-value").format(key1, key2),
            "value":
            value,
        }],
    })
def stage_is_empty():
    try:
        table_stage = dynamodb_client.Table(DYNAMO_DB_STAGE_TABLE)
        results_stage = table_stage.scan()
        if results_stage.get('Items'):
            return False
        else:
            return True

    except Exception as e:
        logger.error("Error Reading DynamoDB Table: {}".format(e))
        send_notification(
            sns_arn=SNS_TOPIC_ARN,
            subject='Datalake:{} Create EMR Cluster error'.format(ENVIRONMENT),
            message=str(e)
        )
        raise e
def main():
    config.load_config()
    print "Notifying users of upcoming items"

    for user in config.get_users():
        dataset = config.get_random_dataset()

        from common import random_date
        from datetime import datetime, timedelta

        start = datetime.now()
        end = datetime.now() + timedelta(days=60)
        date = random_date(start, end)

        address = common.generate_token_and_address(user, dataset.get('name'),
                                                    date)
        common.send_notification(user, "data.gov.uk <{}>".format(address),
                                 dataset)
def set_spark_submit_rule_status(status):
    if status not in ['ENABLED', 'DISABLED']:
        logger.error(
            'Missing status parameter. Must set status to ENABLED or DISABLED')
        raise Exception('Missing status to set the event rule')
    logger.info(
        'We are going to {} the scheduled trigger to continue'.format(status))
    try:
        if status == 'ENABLED':
            api_request = 'events:EnableRule'
            resp = events_client.enable_rule(Name=EVENT_SPARK_SUBMIT)
        else:
            api_request = 'events:DisableRule'
            resp = events_client.disable_rule(Name=EVENT_SPARK_SUBMIT)
        logger.debug('{} Rule response: {}'.format(status, resp))
    except ClientError as er:
        msg_exception = "{} Events Rule Exception: {}".format(status, er)
        logger.error(msg_exception)
        send_notification(
            SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
            "CloudWatch Enable Rule request error: {}".format(msg_exception))
        raise Exception('Unable to request API {}'.format(api_request))
    return
Example #15
0
# -*- coding: utf-8 -*-
"""The watcher script."""

from common import send_notification
import subprocess
import os
import re
import time

send_notification("Waiting...", "Waiting for containers to start...")

my_env = os.environ.copy()

polling_interval = 10  # 10 seconds
max_polling_duration = 10 * 60  # 10 minutes
duration = 0
while True:
    subprocess.check_output
    duration += polling_interval
    running_containers = subprocess.run(["docker", "compose", "top"],
                                        stdout=subprocess.PIPE,
                                        env=my_env)
    print(running_containers.stdout)
    if len(running_containers.stdout) > 10:
        break

    if duration > max_polling_duration:
        send_notification("Failed...", "Container failed to start on time",
                          "Hero")
        exit(1)
def create_cluster():
    logger.info('There is no Cluster created to execute the jobs')
    logger.info('We are going to create a new one to run the jobs.')
    
    # JSON
    args = {
        "Name":
        label,
        "LogUri":
        "s3://{}".format(S3_LOG_URI),
        "ReleaseLabel":
        EMR_RELEASE,
    }
    if EMR_CUSTOM_AMI:
        args.update({
            "CustomAmiId": (EMR_CUSTOM_AMI_ID)
        })
        
    args.update({
        "Instances": {
            "InstanceGroups": [
                {
                    "InstanceRole": "MASTER",
                    "InstanceType": str(INSTANCE_TYPE_MASTER),
                    "Name": "Master instance group",
                    "InstanceCount": 1
                }, {
                    "InstanceRole": "CORE",
                    "InstanceType": str(INSTANCE_TYPE_CORE),
                    "Name": "Core instance group",
                    "InstanceCount": int(INSTANCE_COUNT_CORE_NODE),
                    "EbsConfiguration": {
                        "EbsBlockDeviceConfigs": [{
                            "VolumeSpecification": {
                                "SizeInGB": 500,
                                "VolumeType": "gp2"
                            },
                            "VolumesPerInstance": 1
                        }
                        ],
                        "EbsOptimized": True
                    }
                }, {
                    "InstanceRole": "TASK",
                    "InstanceType": str(INSTANCE_TYPE_TASK),
                    "Name": "Task instance group",
                    "InstanceCount": int(INSTANCE_COUNT_TASK_NODE)
                }
            ],
            "Ec2KeyName": EC2_KEYPAIR,
            "KeepJobFlowAliveWhenNoSteps": True,
            "TerminationProtected": False,
            "Ec2SubnetId": EC2_SUBNET_ID
        },
        "BootstrapActions": [{
            'Name': 'Install Libs and Bootstrap Scripts',
            'ScriptBootstrapAction': {
                'Path': 's3://{}/bootstrap/emr-bootstrap/install_libs.sh'.format(S3_BOOTSTRAP_BUCKET),
                'Args': [S3_BOOTSTRAP_BUCKET]
            }
        },
        {
            'Name': 'Boostrap ENI MASTER',
            'ScriptBootstrapAction': {
                'Path': 's3://{}/bootstrap/emr-bootstrap/emr-eni-proc.sh'.format(S3_BOOTSTRAP_BUCKET),
                'Args': [ENI_MASTER]
            }
        }],
        "Steps": [{
            'Name': 'Install the Manage cron job to terminate EMR cluster',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    '{}/manage_emr_shutdown_install.sh'.format(EMR_HOME_SCRIPTS),
                    '{}/manage_emr_shutdown.sh'.format(EMR_HOME_SCRIPTS),
                    S3_BOOTSTRAP_BUCKET,
                    SNS_TOPIC_ARN
                ]
            }
        }],
        "Applications": [{
            'Name': 'Hadoop'
        }, {
            'Name': 'Hive'
        }, {
            'Name': 'Oozie'
        }, {
            'Name': 'Ganglia'
        }, {
            'Name': 'Tez'
        }, {
            'Name': 'Hue'
        }, {
            'Name': 'Spark'
        }],
        "Configurations": [
            {
                "Classification": "emrfs-site",
                "Properties": {
                    "fs.s3.consistent.retryPeriodSeconds": "10",
                    "fs.s3.consistent": "true",
                    "fs.s3.consistent.retryCount": "5",
                    "fs.s3.consistent.metadata.tableName": "EmrFSMetadata"
                },
                "Configurations": [

                ]
            },
            {
                "Classification": "hive-site",
                "Properties": {
                    "hive.metastore.client.factory.class":
                        "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                },
                "Configurations": [
                ]
            },
            {
                "Classification": "spark-hive-site",
                "Properties": {
                    "hive.metastore.client.factory.class":
                        "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                },
                "Configurations": [
                ]
            },
        ],
        "VisibleToAllUsers":
        True,
        "JobFlowRole":
        EMR_EC2_ROLE,
        "ServiceRole":
        EMR_ROLE,
        "Tags": [
            {
                'Key': 'Role',
                'Value': 'EMR Data Lake'
            },
            {
                'Key': 'Environment',
                'Value': ENVIRONMENT
            },
            {
                'Key': 'Label',
                'Value': label
            },
            {
                'Key': 'Name',
                'Value': label
            }
        ]
    })

    # Create new EMR cluster
    emr_launch_message = 'Launching new EMR cluster: {}'.format(label)
    logger.info(emr_launch_message)
    send_notification(
        sns_arn=SNS_TOPIC_ARN,
        subject='Datalake:{} Create EMR Cluster message'.format(ENVIRONMENT),
        message=emr_launch_message
    )

    try:
        response = emr_client.run_job_flow(**args)
            
        return response
    except Exception as e:
        logger.error("RunJobFlow Exception: {}".format(e))
        send_notification(
            sns_arn=SNS_TOPIC_ARN,
            subject='Datalake:{} Create EMR Cluster Error'.format(ENVIRONMENT),
            message='Lambda Create EMR Cluster Error\nError message: {}'.format(e)
        )
        raise e
def lambda_handler(event, context):
    # chooses the first cluster which is Running or Waiting
    # possibly can also choose by name or already have the cluster id
    skip = None
    if isinstance(event, dict):
        skip = event.get('skip')

    clusters = emr_client.list_clusters(
        ClusterStates=['STARTING', 'RUNNING', 'WAITING'])
    logger.info(clusters)

    # ClusterName
    logger.info(event.values())

    logger.info(event.get('detail', {}).get('name', {}))
    clusterValue = event.get('detail', {}).get('name', {})

    if clusterValue != CLUSTER_NAME:
        logger.error("No valid cluster")
        return 'No valid cluster'

    # choose the correct cluster
    for cluster in clusters.get('Clusters', []):
        if cluster['Name'] == CLUSTER_NAME:
            # take the first relevant cluster
            cluster_id = cluster['Id']
            break
    else:
        logger.error("No valid clusters")
        return 'No valid clusters'

    step_args = [
        SETUP_JOBS, "s3://{}/{}".format(S3_BUCKET_PROGRAMS, S3_KEY_PROGRAMS)
    ]

    step = {
        "Name": 'Setup_jobs',
        'ActionOnFailure': 'CONTINUE',
        'HadoopJarStep': {
            'Jar':
            's3://{}.elasticmapreduce/libs/script-runner/script-runner.jar'.
            format(REGION),
            'Args':
            step_args
        }
    }

    logger.debug("### Debug mode enabled ###")
    logger.debug("EMR Step: {}".format(step))
    logger.debug("EMR Cluster_id: {}".format(cluster_id))

    try:
        action = emr_client.add_job_flow_steps(JobFlowId=cluster_id,
                                               Steps=[step])
        logger.info('EMR action: {}'.format(action))

    except Exception as e:
        msg_exception = "EMR Exception: " + str(e)
        logger.error(msg_exception)
        send_notification(
            SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
            "Lambda Function Name: {}\n{}".format(context.function_name,
                                                  msg_exception))
        return

    try:
        table_stage = dynamodb_client.Table(DYNAMO_DB_STAGE_TABLE)
        table_job = dynamodb_client.Table(DYNAMO_DB_JOB_CATALOG)
        results = table_stage.scan(
            FilterExpression=Attr('file_status').ne(skip))

    except Exception as e:
        msg_exception = "DynamoDB Scan Exception: {}".format(e)
        logger.error(msg_exception)
        send_notification(
            SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
            "Lambda Function Name: {}\n{}".format(context.function_name,
                                                  msg_exception))
        return

    for item in results.get('Items'):
        s3_object_name_stage = item.get('s3_object_name_stage')
        partition_date = item.get('partition')
        s3_dir_stage = item.get('s3_dir_stage')

        logger.debug("### Debug mode enabled ###")
        logger.debug("Items: {}".format(item))
        logger.debug("partition_date: {}".format(partition_date))
        logger.debug("s3_dir_stage: {}".format(s3_dir_stage))

        try:
            responses = table_job.get_item(
                Key={'s3_data_source': str(s3_dir_stage)})
        except Exception as e:
            msg_exception = "DynamoDB Job GetItem Exception: {}".format(e)
            logger.error(msg_exception)
            send_notification(
                SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
                "Lambda Function Name: {}\n{}".format(context.function_name,
                                                      msg_exception))
            return

        if responses.get('Item'):
            spark_program_s3_path = responses['Item']['programs']
            spark_program = spark_program_s3_path.split("/")[-1]
            hive_database_raw = responses['Item']['hive_database_raw']
            hive_database_analytics = responses['Item'][
                'hive_database_analytics']
            hive_table_raw = responses['Item']['hive_table_raw']
            hive_table_analytics = responses['Item']['hive_table_analytics']
            s3_target = responses['Item']['s3_target']
            partition_name_stage = responses['Item']['partition_name_stage']
            status_enabled = responses['Item']['Enabled']
            params_type = responses.get('Item', {}).get('params_type')
            params = responses.get('Item', {}).get('params')

            logger.debug("responses: {}".format(responses['Item']))
            logger.debug(
                "spark_program_s3_path: {}".format(spark_program_s3_path))
            logger.debug("spark_program: {}".format(spark_program))
            logger.debug("hive_database_raw: {}".format(hive_database_raw))
            logger.debug(
                "hive_database_analytics: {}".format(hive_database_analytics))
            logger.debug("hive_table_raw: {}".format(hive_table_raw))
            logger.debug(
                "hive_table_analytycs: {}".format(hive_table_analytics))
            logger.debug("s3_target: {}".format(s3_target))
            logger.debug(
                "partition_name_stage: {}".format(partition_name_stage))
            logger.debug("status_enabled: {}".format(status_enabled))

            # code location on your emr master node
            code_path = "/home/hadoop/code/"

            # spark configuration example
            # step_args = ["/usr/bin/spark-submit", "--spark-conf", "your-configuration",
            #             code_path + "your_file.py", '--your-parameters', 'parameters']
            step_args = [
                "/usr/bin/spark-submit", "--conf",
                "spark.yarn.appMasterEnv.PYTHONIOENCODING=utf8"
            ]
            if params_type and params_type == 'json':
                step_args.append(code_path + spark_program)
            elif params_type and params_type == 'cli':
                step_args.append(code_path + spark_program)
                for param in params.split(' '):
                    step_args.append(param)
            else:
                step_args.append(code_path + spark_program)
                step_args.append(hive_database_raw)
                step_args.append(hive_table_raw)
                step_args.append(s3_dir_stage)
                step_args.append(hive_database_analytics)
                step_args.append(hive_table_analytics)
                step_args.append(s3_target)
                if partition_name_stage != 'false':
                    step_args.append('{}={}'.format(partition_name_stage,
                                                    partition_date))

            step = {
                "Name": s3_object_name_stage,
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar': 'command-runner.jar',
                    'Args': step_args
                }
            }

            if status_enabled == "True":
                timestamp_step_submitted = time.strftime(
                    "%Y-%m-%dT%H:%M:%S-%Z")
                try:
                    action = emr_client.add_job_flow_steps(
                        JobFlowId=cluster_id, Steps=[step])

                    logger.debug("### Debug mode enabled ###")
                    logger.debug("EMR Step: {}".format(step))
                    logger.debug("EMR Step timestamp_submitted: {}".format(
                        timestamp_step_submitted))
                    logger.debug("EMR Cluster_id: {}".format(cluster_id))
                    logger.debug("Added step:  {}".format(action))
                except ClientError as e:
                    if e.operation_name == 'AddJobFlowSteps' and e.response[
                            'Error']['Message'] == STEPS_EXCEEDED:
                        logger.info(
                            'The maximum number of steps for cluster exceeded')
                        rule_status = check_spark_submit_rule_enabled()
                        if rule_status == 'DISABLED':
                            set_spark_submit_rule_status('ENABLED')
                        else:
                            logger.info(
                                'The Spark Submit Rule is enabled, exiting')
                            return
                    else:
                        msg_exception = "EMR Add Steps Exception: {}".format(e)
                        logger.error(msg_exception)
                        send_notification(
                            SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
                            "Lambda Function Name: {}\n{}".format(
                                context.function_name, msg_exception))
                        return 'Error Sending Job Flow Steps'
                    return 'Finished sending step Jobs but with more on queue'

                # If we were able to send the job we need to update the DDB table with the new Status
                try:
                    response = table_stage.update_item(
                        TableName=DYNAMO_DB_STAGE_TABLE,
                        Key={'s3_object_name_stage': s3_object_name_stage},
                        UpdateExpression=
                        "set hive_table_analytics = :hive_table_analytics,"
                        "hive_database_analytics = :hive_database_analytics,"
                        "s3_target = :s3_target,"
                        "timestamp_step_submitted = :timestamp_step_submitted,"
                        "file_status = :file_status",
                        ExpressionAttributeValues={
                            ':hive_table_analytics': hive_table_analytics,
                            ':hive_database_analytics':
                            hive_database_analytics,
                            ':s3_target': s3_target,
                            ':timestamp_step_submitted':
                            timestamp_step_submitted,
                            ':file_status': DatalakeStatus.PROCESSING
                        })
                    logger.info(
                        'DynamoDB update response: {}'.format(response))
                except Exception as e:
                    msg_exception = "DynamoDB Stage Update Item Exception: {}".format(
                        e)
                    logger.error(msg_exception)
                    send_notification(
                        SNS_TOPIC_ARN, "Data Lake: Spark Submit Exception",
                        "Lambda Function Name: {}\n{}".format(
                            context.function_name, msg_exception))
                    return

            else:
                logger.info("The program is not enabled: {}".format(
                    spark_program_s3_path))
        else:
            logger.info('There is no items returned from DynamoDB')

    if skip:
        # We are running from a scheduled rule and there is no more jobs to submit
        # Let's disable the scheduled rule
        logger.info(
            'Disabling Scheduled Event Rule due to no more jobs to submit')
        set_spark_submit_rule_status('DISABLED')

    logger.info('Finished processing the Spark Submit function')
    return
Example #18
0
def lambda_handler(event, context):
    step_name = event.get('detail', {}).get('name')
    event_step_message = event.get('detail', {}).get('message')
    event_step_state = event.get('detail', {}).get('state')
    event_step_id = event.get('detail', {}).get('stepId')
    event_cluster_id = event.get('detail', {}).get('clusterId')

    if 's3://' not in step_name:
        logger.info("It is not a catalog job.")
        return

    cluster_info = emr_client.describe_cluster(ClusterId=event_cluster_id)
    logger.info(cluster_info)
    cluster_name = cluster_info.get("Cluster", {}).get("Name")
    logger.info(cluster_name)

    timestamp_step_finished = time.strftime("%Y-%m-%dT%H:%M:%S-%Z")

    logger.debug("### Debug mode enabled ###")
    logger.debug("Received event: {}".format(json.dumps(event, indent=2)))
    logger.debug("Step Name: {}".format(step_name))
    logger.debug("Cluster Name: {}".format(event_cluster_id))
    logger.debug("Message event step changed: {}".format(event_step_message))

    if 'COMPLETED' in event_step_state:

        message_step_completed = "job execution completed: Name: {}; ID: {}".format(step_name, event_step_id)
        logger.info(message_step_completed)

        try:
            table_stage = dynamodb_resource.Table(DYNAMO_DB_STAGE_TABLE)

            response = table_stage.get_item(Key={'s3_object_name_stage': str(step_name)})
            logger.info(response)

        except Exception as e:
            msg_exception = "DynamoDB Exception: {}".format(e)
            logger.error(msg_exception)
            logger.debug(traceback.print_exc())
            send_notification(
                SNS_TOPIC_ARN,
                'AWS Lambda: {function_name}'
                ' error: Unable to get DynamoDB Item.\nError: {error}'.format(
                    function_name=context.function_name,
                    error=e
                ),
                'Datalake:{} Lambda Error'.format(ENVIRONMENT)
            )
            return 'Unable to Get Item from table'

        if response.get('Item'):
            hive_database_analytics = response['Item']['hive_database_analytics']
            hive_table_analytics = response['Item']['hive_table_analytics']
            s3_target = response['Item']['s3_target']
            s3_object_name_raw = response['Item']['s3_object_name_raw']
            logger.debug("### Debug mode enabled ###")
            logger.debug("Updating Table: {}".format(DYNAMO_DB_CONTROL))
            logger.debug("s3_object_name_raw: {}".format(s3_object_name_raw))
            try:
                table_control = dynamodb_resource.Table(DYNAMO_DB_CONTROL)
                response_control = table_control.update_item(
                    TableName=DYNAMO_DB_CONTROL,
                    Key={
                        's3_object_name': str(s3_object_name_raw)
                    },
                    UpdateExpression="set file_status = :file_status, "
                                     "timestamp_step_finished = :timestamp_step_finished, "
                                     "hive_table_analytics = :hive_table_analytics, "
                                     "hive_database_analytics = :hive_database_analytics, "
                                     "s3_target = :s3_target",
                    ExpressionAttributeValues={
                        ':file_status': DatalakeStatus.LOADED,
                        ':timestamp_step_finished': str(timestamp_step_finished),
                        ':hive_table_analytics': str(hive_table_analytics),
                        ':hive_database_analytics': str(hive_database_analytics),
                        ':s3_target': str(s3_target)}
                )
                logger.debug('DDB update_item response: {}'.format(response_control))
            except Exception as e:
                msg_exception = "DynamoDB Exception: {}".format(e)
                logger.error(msg_exception)
                logger.debug(traceback.print_exc())
                send_notification(
                    SNS_TOPIC_ARN,
                    'AWS Lambda: {function_name}'
                    ' error: Unable to update DynamoDB Item.\nError: {error}'.format(
                        function_name=context.function_name,
                        error=e
                    ),
                    'Datalake:{} Lambda Error'.format(ENVIRONMENT)
                )
                return 'Unable to Update Item from table'
        else:
            logger.info('There is no items returned from DynamoDB!')
            return 'No items to process'

        # TODO: Create a parameter to Delete or Keep the item in the DynamoDB StageControl
        logger.info("Cleaning Table DynamoDB: {}; s3_object_name_stage: {}".format(DYNAMO_DB_STAGE_TABLE, step_name))
        try:
            response_stage = table_stage.delete_item(Key={'s3_object_name_stage': step_name})
            http_status_code_delete_stage = response_stage['ResponseMetadata']['HTTPStatusCode']
            logger.debug("### Debug mode enabled ###")
            logger.debug(response)
            logger.debug("HTTPStatusCode: {}".format(http_status_code_delete_stage))
        except Exception as e:
            msg_exception = "DynamoDB Exception: {}".format(e)
            logger.error(msg_exception)
            send_notification(
                SNS_TOPIC_ARN,
                'AWS Lambda: {function_name}'
                ' error: Unable to delete DynamoDB Item.\nError: {error}'.format(
                    function_name=context.function_name,
                    error=e
                ),
                'Datalake:{} Lambda Error'.format(ENVIRONMENT)
            )
            return 'Unable to delete Item from table'

        logger.info("Cleaning s3 object stage: {}".format(step_name))
        bucket_stage = step_name.split("/")[2]
        logger.info("bucket: {}".format(bucket_stage))
        key_stage = step_name.split('/', 3)[3]
        logger.info("Key: {}".format(key_stage))
        try:
            s3_client.delete_object(Bucket=bucket_stage, Key=key_stage)
            # check if there are files pending to be processed
            # This step shutdown the cluster if there are no items in the StageControl Table
            check_files_shutdown_emr(event_cluster_id, context)
        except Exception as e:
            logger.error("S3 Exception: {}".format(e))
            return

    elif 'FAILED' in event_step_state:
        message_step_failed = "job execution failed: Name: {}; ID: {}".format(step_name, event_step_id)
        logger.info(message_step_failed)
        update_ddb_stage_control(step_name, DatalakeStatus.FAILED, timestamp_step_finished)

    elif 'CANCELLED' in event_step_state:
        message_step_cancelled = "job execution cancelled: Name: {}; ID: {}".format(step_name, event_step_id)
        logger.info(message_step_cancelled)
        update_ddb_stage_control(step_name, DatalakeStatus.CANCELED, timestamp_step_finished)
    else:
        return