def lambda_handler(event, context): ''' The entry point for the Lambda function :param event: A dictionary containing all the information about the event that triggered the Lambda function :param context: This object contains information about the state of the Lambda function itself. :return: N/A ''' # set environment variables environment_name = event['ResourceProperties']['EnvnameProperty'] micro_environment = event['ResourceProperties']['MicroenvProperty'] client = event['ResourceProperties']['ClientProperty'] account = event['ResourceProperties']['AccountProperty'] ddl_bucket = event['ResourceProperties']['DDLBucketProperty'] ddl_create_key = event['ResourceProperties']['CreateSqlS3KeyProperty'] ddl_delete_key = event['ResourceProperties']['DeleteSqlS3KeyProperty'] data_bucket = event['ResourceProperties']['DataBucketProperty'] test_output_location = event['ResourceProperties'][ 'AthenaSqlOutputUriProperty'] physical_resource_id = context.log_stream_name # Create a dictionary of placeholders and their values that will be replaced in sql commands. dict_of_replacements = { "__bucket__": data_bucket, "__envname__": environment_name, "__microenv__": micro_environment, "__client__": client, "__account__": account } ###Set AWS specific variables### # Instantiate Connection conn = Connection() # Create an Athena connection conn_athena = conn.athena_connection() # Create an S3 connection conn_s3 = conn.s3_connection() # Instantiate S3Manager s3_manager = S3Manager(conn_s3) # Instantiate Athena Manager athena_manager = AthenaManager(conn_athena) try: if event['RequestType'] != 'Delete': # Download file that has the SQL Create Commands from s3 ddl_filename = s3_manager.download_object(ddl_bucket, ddl_create_key) logger.info('Downloaded {}'.format(ddl_filename)) response = execute_scripts(athena_manager, ddl_filename, test_output_location, dict_of_replacements, 'Create') send_response(event, context, SUCCESS, response, physical_resource_id) return response elif event['RequestType'] == 'Delete': # Download file that has the SQL Delete Commands from s3 ddl_filename = s3_manager.download_object(ddl_bucket, ddl_delete_key) logger.info('Downloaded {}'.format(ddl_filename)) response = execute_scripts(athena_manager, ddl_filename, test_output_location, dict_of_replacements, 'Delete') send_response(event, context, SUCCESS, response, physical_resource_id) return response except Exception as error: logger.info(error.args) response_data = {'Reason': error.args} send_response(event, context, FAILED, response_data, physical_resource_id) return error.args
def lambda_handler(event, context): """ This is the main entry point for lambda function :param event: Event object that contains the bucket name and key of the manifest file that triggered the lambda :param context: Contains context information about the lambda function itself e.g. remaining time :return: Does not return anything """ # set environment exec_environment = os.environ['exec_environment'] etl_file_path = os.environ['etl_file_path'] manifest_file_path = os.environ['manifest_file_path'] manifest_file = os.environ['manifest_file'] log_uri = os.environ['log_uri'] # Download manifest file get_manifest_file(event, manifest_file_path, manifest_file) # Instantiate Connection conn = Connection() # Create an EMR connection conn_emr = conn.emr_connection() # Create an S3 connection conn_s3 = conn.s3_connection() # Instantiate S3Manager s3_manager = S3Manager() # Parse the manifest file and generate etl from ETL template wth placeholder values filled in. # Also gets the details about the EMR cluster to create. try: # Instantiate ManifestParser manifest_parser = ManifestParser() logger.info( "Generating new ETL file from ETL template wth placeholder values filled in" ) dest_etl_file = manifest_parser.parse_manifest_file( manifest_file_path, manifest_file, conn_s3, s3_manager, exec_environment) logger.info("Generated: {}".format(dest_etl_file)) except: logger.error( 'Failed while trying to generate a new ETL from s3://{}/{}'.format( manifest_parser.script_s3_bucket, manifest_parser.script_s3_key)) logging.error(sys.exc_info()) raise # Copy generated ETL to S3. This will then be submitted to EMR try: s3_manager.upload_object(conn_s3, etl_file_path, dest_etl_file, manifest_parser.script_s3_bucket, 'generated-etls', dest_etl_file) except: raise # Launch and submit jobs to EMR try: # Instantiate EMR Instance emr = EMRInstance() cluster_name = "{}_{}".format(exec_environment, manifest_parser.script_s3_key) cluster_id = emr.get_first_available_cluster(conn_emr) if manifest_parser.use_existing_cluster and cluster_id: instance_groups = emr.get_instance_groups(conn_emr, cluster_id) group_id = instance_groups['CORE'] instance_groups_count = emr.get_instance_groups_count( conn_emr, cluster_id) current_instance_count = instance_groups_count[group_id] if manifest_parser.instance_count > current_instance_count: emr.set_instance_count(conn_emr, cluster_id, group_id, manifest_parser.instance_count) # Allow 10 secs for resizing to start time.sleep(10) #submit job emr.submit_job( conn_emr, cluster_id, 's3://{}/generated-etls/{}'.format( manifest_parser.script_s3_bucket, dest_etl_file), dest_etl_file, 'cluster', 'CONTINUE') else: # Launch EMR cluster emr.launch_emr_and_submit_job( conn_emr, log_uri, 's3://{}/generated-etls/{}'.format( manifest_parser.script_s3_bucket, dest_etl_file), dest_etl_file, 'cluster', 'CONTINUE', '{}'.format(cluster_name), manifest_parser.terminate_cluster, manifest_parser.instance_type, manifest_parser.instance_count) logger.info("Submitted s3://{}/{} to process_{}".format( manifest_parser.script, dest_etl_file, cluster_name)) except: logger.error( "Failed while trying to launch EMR cluster. Details below:") raise