def handler(event=None, context=None) -> dict: payload = get_payload(event) logger = configure_log() logger.info(payload) if PAYLOAD_CORRELATION_ID in payload and PAYLOAD_S3_PREFIX in payload: raise ValueError( "Data passed it triggeres old handler which has now been depracated. Please use new handler" ) if (PAYLOAD_EVENT_NOTIFICATION_RECORDS in payload and PAYLOAD_BODY in payload[PAYLOAD_EVENT_NOTIFICATION_RECORDS][0]): message = payload[PAYLOAD_EVENT_NOTIFICATION_RECORDS][0] loaded_payload_body = json.loads(message[PAYLOAD_BODY]) logger.info( f'Processing payload from SQS", "payload": "{loaded_payload_body}') if (PAYLOAD_EVENT_NOTIFICATION_RECORDS in loaded_payload_body and PAYLOAD_S3 in loaded_payload_body[PAYLOAD_EVENT_NOTIFICATION_RECORDS][0]): logger.info( f'Using S3 event notification handler", "payload": "{payload}') correlation_id = (message["messageId"] if "messageId" in message else str(uuid.uuid4())) logger.info( f'Correlation id set", "correlation_id": "{correlation_id}') return s3_event_notification_handler( correlation_id, loaded_payload_body[PAYLOAD_EVENT_NOTIFICATION_RECORDS][0], ) try: payload = Payload(**payload) except: raise TypeError("Invalid request payload") cluster_config = build_config( payload.s3_overrides, payload.overrides, payload.extend, payload.additional_step_args, ) if payload.copy_secconfig: secconfig_orig = cluster_config.get("SecurityConfiguration", "") if secconfig_orig != "": secconfig = dup_security_configuration(secconfig_orig) cluster_config["SecurityConfiguration"] = secconfig return emr_launch_cluster(cluster_config)
def add_command_line_params( cluster_config, correlation_id, s3_prefix, snapshot_type, export_date, skip_pdm_trigger, ): """ Adding command line arguments to ADG and PDM EMR steps scripts. First if block in Try is for PDM and the second one is for ADG. """ logger = configure_log() try: for step_name in [ SEND_NOTIFICATION_STEP, COURTESY_FLUSH_STEP_NAME, SUBMIT_JOB, CREATE_CLIVE_DATABASES, CREATE_UC_FEATURE_DATABASES, CREATE_HIVE_DYNAMO_TABLE, SOURCE, ]: add_command_line_args_to_step( cluster_config, correlation_id, s3_prefix, snapshot_type, export_date, step_name, ) add_command_line_args_to_step( cluster_config, correlation_id, s3_prefix, snapshot_type, export_date, CREATE_PDM_TRIGGER_STEP_NAME, skip_pdm_trigger, ) except Exception as ex: logger.error(ex) raise ex
from emr_launcher.logger import configure_log from emr_launcher.handler import handler logger = configure_log() try: handler() except Exception as e: logger.error(e)
def add_command_line_params(cluster_config, correlation_id, s3_prefix, snapshot_type, export_date): """ Adding command line arguments to ADG and PDM EMR steps scripts. First if block in Try is for PDM and the second one is for ADG. """ logger = configure_log() print(correlation_id, "\n", s3_prefix) try: if (next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SOURCE), None, ) is not None): pdm_script_args = next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SOURCE), None, )[HADOOP_JAR_STEP][ARGS] pdm_script_args.append(CORRELATION_ID) pdm_script_args.append(correlation_id) pdm_script_args.append(S3_PREFIX) pdm_script_args.append(s3_prefix) next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SOURCE), None, )[HADOOP_JAR_STEP][ARGS] = pdm_script_args if (next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_HIVE_DYNAMO_TABLE), None, ) is not None): pdm_script_args = next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_HIVE_DYNAMO_TABLE), None, )[HADOOP_JAR_STEP][ARGS] pdm_script_args.append(CORRELATION_ID) pdm_script_args.append(correlation_id) pdm_script_args.append(S3_PREFIX) pdm_script_args.append(s3_prefix) pdm_script_args.append(SNAPSHOT_TYPE) pdm_script_args.append(snapshot_type) pdm_script_args.append(EXPORT_DATE_COMMAND) pdm_script_args.append(export_date) next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_HIVE_DYNAMO_TABLE), None, )[HADOOP_JAR_STEP][ARGS] = pdm_script_args except Exception as e: logger.error(e) try: if (next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SUBMIT_JOB), None, ) is not None): adg_script_args = next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SUBMIT_JOB), None, )[HADOOP_JAR_STEP][ARGS] adg_script_args.append(CORRELATION_ID) adg_script_args.append(correlation_id) adg_script_args.append(S3_PREFIX) adg_script_args.append(s3_prefix) adg_script_args.append(SNAPSHOT_TYPE) adg_script_args.append(snapshot_type) adg_script_args.append(EXPORT_DATE_COMMAND) adg_script_args.append(export_date) print(adg_script_args) next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == SUBMIT_JOB), None, )[HADOOP_JAR_STEP][ARGS] = adg_script_args except Exception as e: logger.error(e) try: if (next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == COURTESY_FLUSH_STEP_NAME), None, ) is not None): adg_script_args = next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == COURTESY_FLUSH_STEP_NAME), None, )[HADOOP_JAR_STEP][ARGS] adg_script_args.append(CORRELATION_ID) adg_script_args.append(correlation_id) adg_script_args.append(S3_PREFIX) adg_script_args.append(s3_prefix) adg_script_args.append(SNAPSHOT_TYPE) adg_script_args.append(snapshot_type) adg_script_args.append(EXPORT_DATE_COMMAND) adg_script_args.append(export_date) print(adg_script_args) next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == COURTESY_FLUSH_STEP_NAME), None, )[HADOOP_JAR_STEP][ARGS] = adg_script_args except Exception as e: logger.error(e) try: if (next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_PDM_TRIGGER_STEP_NAME), None, ) is not None): adg_script_args = next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_PDM_TRIGGER_STEP_NAME), None, )[HADOOP_JAR_STEP][ARGS] adg_script_args.append(CORRELATION_ID) adg_script_args.append(correlation_id) adg_script_args.append(S3_PREFIX) adg_script_args.append(s3_prefix) adg_script_args.append(SNAPSHOT_TYPE) adg_script_args.append(snapshot_type) adg_script_args.append(EXPORT_DATE_COMMAND) adg_script_args.append(export_date) print(adg_script_args) next( (sub for sub in cluster_config[STEPS] if sub[NAME_KEY] == CREATE_PDM_TRIGGER_STEP_NAME), None, )[HADOOP_JAR_STEP][ARGS] = adg_script_args except Exception as e: logger.error(e)
def old_handler(event=None) -> dict: """Launches an EMR cluster with the provided configuration.""" logger = configure_log() correlation_id_necessary = False # If when this lambda is triggered via API # Elif when this lambda is triggered via SNS correlation_id = get_value(PAYLOAD_CORRELATION_ID, event) s3_prefix = get_value(PAYLOAD_S3_PREFIX, event) snapshot_type = get_value(PAYLOAD_SNAPSHOT_TYPE, event) export_date = get_value(PAYLOAD_EXPORT_DATE, event) if "Records" in event or (PAYLOAD_CORRELATION_ID in event and PAYLOAD_S3_PREFIX in event): correlation_id_necessary = True cluster_config = read_config("cluster") cluster_name = cluster_config["Name"] # if ADG and if snapshot_type is incremental use "configurations_incremental" config_yml_name = get_config_file_name(cluster_name, snapshot_type) cluster_config.update( read_config(config_type=config_yml_name, s3_overrides=None, required=False)) try: if (next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, ) is not None): secret_name = next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, )["Properties"]["javax.jdo.option.ConnectionPassword"] secret_value = sm_retrieve_secrets(secret_name) next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, )["Properties"][ "javax.jdo.option.ConnectionPassword"] = secret_value except Exception as e: logger.info(e) try: if (next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, ) is not None): secret_name = next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, )["Properties"]["javax.jdo.option.ConnectionPassword"] secret_value = sm_retrieve_secrets(secret_name) next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, )["Properties"][ "javax.jdo.option.ConnectionPassword"] = secret_value except Exception as e: logger.info(e) cluster_config.update(read_config("instances")) cluster_config.update( read_config(config_type="steps", s3_overrides=None, required=False)) if correlation_id_necessary: add_command_line_params(cluster_config, correlation_id, s3_prefix, snapshot_type, export_date) adg_trim_steps_for_incremental(cluster_config, snapshot_type) adg_trim_steps_for_full(cluster_config, snapshot_type) # Renaming ADG cluster based on snapshot type full/incremental if cluster_name == ADG_NAME: update_adg_cluster_name(cluster_config, snapshot_type) logger.debug("Requested cluster parameters", extra=cluster_config) resp = emr_launch_cluster(cluster_config) job_flow_id = resp["JobFlowId"] additional_tags = { "Correlation_Id": correlation_id, "snapshot_type": snapshot_type, "export_date": export_date, } logger.debug(resp) emr_cluster_add_tags(job_flow_id, additional_tags) return resp
def s3_event_notification_handler(correlation_id, record=None) -> dict: """Launches an EMR cluster with the provided configuration.""" logger = configure_log() logger.info(record) export_date = get_event_time_as_date_string( get_value(PAYLOAD_EVENT_TIME, record)) s3_object = get_value(PAYLOAD_S3, record) s3_bucket_object = get_value(PAYLOAD_BUCKET, s3_object) s3_object_object = get_value(PAYLOAD_OBJECT, s3_object) s3_prefix = get_value(PAYLOAD_KEY, s3_object_object) s3_bucket_name = get_value(PAYLOAD_NAME, s3_bucket_object) cluster_config = read_config("cluster") configurations_config_yml_name = "configurations" cluster_config.update( read_config( config_type=configurations_config_yml_name, s3_overrides=None, required=False, )) try: if (next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, ) is not None): secret_name = next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, )["Properties"]["javax.jdo.option.ConnectionPassword"] secret_value = sm_retrieve_secrets(secret_name) next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "spark-hive-site"), None, )["Properties"][ "javax.jdo.option.ConnectionPassword"] = secret_value except Exception as e: logger.info(e) try: if (next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, ) is not None): secret_name = next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, )["Properties"]["javax.jdo.option.ConnectionPassword"] secret_value = sm_retrieve_secrets(secret_name) next( (sub for sub in cluster_config["Configurations"] if sub["Classification"] == "hive-site"), None, )["Properties"][ "javax.jdo.option.ConnectionPassword"] = secret_value except Exception as e: logger.info(e) cluster_config.update(read_config("instances")) cluster_config.update( read_config(config_type="steps", s3_overrides=None, required=False)) HADOOP_JAR_STEP = "HadoopJarStep" ARGS = "Args" STEPS = "Steps" for sub in cluster_config[STEPS]: if HADOOP_JAR_STEP in sub: script_args = sub[HADOOP_JAR_STEP][ARGS] script_args.append("--correlation_id") script_args.append(correlation_id) script_args.append("--s3_bucket_name") script_args.append(s3_bucket_name) script_args.append("--s3_prefix") script_args.append(s3_prefix) script_args.append("--export_date") script_args.append(export_date) sub[HADOOP_JAR_STEP][ARGS] = script_args resp = emr_launch_cluster(cluster_config) job_flow_id = resp["JobFlowId"] logger.debug(resp) additional_tags = { "Correlation_Id": correlation_id, "export_date": export_date, } emr_cluster_add_tags(job_flow_id, additional_tags) return resp