예제 #1
0
def lambda_handler(event, context):
    '''
    The entry point for the Lambda function
    :param event: A dictionary containing all the information about the event that triggered the Lambda function
    :param context: This object contains information about the state of the Lambda function itself.
    :return: N/A
    '''
    # set environment variables
    environment_name = event['ResourceProperties']['EnvnameProperty']
    micro_environment = event['ResourceProperties']['MicroenvProperty']
    client = event['ResourceProperties']['ClientProperty']
    account = event['ResourceProperties']['AccountProperty']
    ddl_bucket = event['ResourceProperties']['DDLBucketProperty']
    ddl_create_key = event['ResourceProperties']['CreateSqlS3KeyProperty']
    ddl_delete_key = event['ResourceProperties']['DeleteSqlS3KeyProperty']
    data_bucket = event['ResourceProperties']['DataBucketProperty']
    test_output_location = event['ResourceProperties'][
        'AthenaSqlOutputUriProperty']
    physical_resource_id = context.log_stream_name

    # Create a dictionary of placeholders and their values that will be replaced in sql commands.
    dict_of_replacements = {
        "__bucket__": data_bucket,
        "__envname__": environment_name,
        "__microenv__": micro_environment,
        "__client__": client,
        "__account__": account
    }

    ###Set AWS specific variables###
    # Instantiate Connection
    conn = Connection()
    # Create an Athena connection
    conn_athena = conn.athena_connection()
    # Create an S3 connection
    conn_s3 = conn.s3_connection()
    # Instantiate S3Manager
    s3_manager = S3Manager(conn_s3)

    # Instantiate Athena Manager
    athena_manager = AthenaManager(conn_athena)

    try:
        if event['RequestType'] != 'Delete':
            # Download file that has the SQL Create Commands from s3
            ddl_filename = s3_manager.download_object(ddl_bucket,
                                                      ddl_create_key)
            logger.info('Downloaded {}'.format(ddl_filename))

            response = execute_scripts(athena_manager, ddl_filename,
                                       test_output_location,
                                       dict_of_replacements, 'Create')
            send_response(event, context, SUCCESS, response,
                          physical_resource_id)
            return response

        elif event['RequestType'] == 'Delete':
            # Download file that has the SQL Delete Commands from s3
            ddl_filename = s3_manager.download_object(ddl_bucket,
                                                      ddl_delete_key)
            logger.info('Downloaded {}'.format(ddl_filename))

            response = execute_scripts(athena_manager, ddl_filename,
                                       test_output_location,
                                       dict_of_replacements, 'Delete')
            send_response(event, context, SUCCESS, response,
                          physical_resource_id)
            return response

    except Exception as error:
        logger.info(error.args)
        response_data = {'Reason': error.args}
        send_response(event, context, FAILED, response_data,
                      physical_resource_id)
        return error.args
예제 #2
0
def lambda_handler(event, context):
    """
    This is the main entry point for lambda function
    :param event: Event object that contains the bucket name and key of the manifest file that triggered the lambda
    :param context: Contains context information about the lambda function itself e.g. remaining time
    :return: Does not return anything
    """
    # set environment
    exec_environment = os.environ['exec_environment']
    etl_file_path = os.environ['etl_file_path']
    manifest_file_path = os.environ['manifest_file_path']
    manifest_file = os.environ['manifest_file']
    log_uri = os.environ['log_uri']

    # Download manifest file
    get_manifest_file(event, manifest_file_path, manifest_file)

    # Instantiate Connection
    conn = Connection()
    # Create an EMR connection
    conn_emr = conn.emr_connection()
    # Create an S3 connection
    conn_s3 = conn.s3_connection()
    # Instantiate S3Manager
    s3_manager = S3Manager()

    # Parse the manifest file and generate etl from ETL template wth placeholder values filled in.
    # Also gets the details about the EMR cluster to create.
    try:
        # Instantiate ManifestParser
        manifest_parser = ManifestParser()
        logger.info(
            "Generating new ETL file from ETL template wth placeholder values filled in"
        )
        dest_etl_file = manifest_parser.parse_manifest_file(
            manifest_file_path, manifest_file, conn_s3, s3_manager,
            exec_environment)
        logger.info("Generated: {}".format(dest_etl_file))
    except:
        logger.error(
            'Failed while trying to generate a new ETL from s3://{}/{}'.format(
                manifest_parser.script_s3_bucket,
                manifest_parser.script_s3_key))
        logging.error(sys.exc_info())
        raise

    # Copy generated ETL to S3. This will then be submitted to EMR
    try:
        s3_manager.upload_object(conn_s3, etl_file_path, dest_etl_file,
                                 manifest_parser.script_s3_bucket,
                                 'generated-etls', dest_etl_file)
    except:
        raise

    # Launch and submit jobs to EMR
    try:
        # Instantiate EMR Instance
        emr = EMRInstance()

        cluster_name = "{}_{}".format(exec_environment,
                                      manifest_parser.script_s3_key)
        cluster_id = emr.get_first_available_cluster(conn_emr)

        if manifest_parser.use_existing_cluster and cluster_id:
            instance_groups = emr.get_instance_groups(conn_emr, cluster_id)
            group_id = instance_groups['CORE']

            instance_groups_count = emr.get_instance_groups_count(
                conn_emr, cluster_id)
            current_instance_count = instance_groups_count[group_id]

            if manifest_parser.instance_count > current_instance_count:
                emr.set_instance_count(conn_emr, cluster_id, group_id,
                                       manifest_parser.instance_count)
                # Allow 10 secs for resizing to start
                time.sleep(10)

            #submit job
            emr.submit_job(
                conn_emr, cluster_id, 's3://{}/generated-etls/{}'.format(
                    manifest_parser.script_s3_bucket, dest_etl_file),
                dest_etl_file, 'cluster', 'CONTINUE')
        else:
            # Launch EMR cluster
            emr.launch_emr_and_submit_job(
                conn_emr, log_uri, 's3://{}/generated-etls/{}'.format(
                    manifest_parser.script_s3_bucket,
                    dest_etl_file), dest_etl_file, 'cluster', 'CONTINUE',
                '{}'.format(cluster_name), manifest_parser.terminate_cluster,
                manifest_parser.instance_type, manifest_parser.instance_count)

        logger.info("Submitted s3://{}/{} to process_{}".format(
            manifest_parser.script, dest_etl_file, cluster_name))
    except:
        logger.error(
            "Failed while trying to launch EMR cluster. Details below:")
        raise