def template_url(inference_type, custom_container, pipeline_type): """ template_url is a helper function that determines the cloudformation stack's file name based on inputs :inference_type: type of inference from lambda event input. Possible values: 'batch' or 'realtime' :custom_container: whether a custom container build is needed in the pipeline or no. Possible values: 'True' or 'False' :return: returns a link to the appropriate coudformation template files which can be one of these values: byom_realtime_build_container.yaml byom_realtime_builtin_container.yaml byom_batch_build_container.yaml byom_batch_builtin_container.yaml """ url = ("https://" + os.environ["BLUEPRINT_BUCKET_URL"] + "/blueprints/" + pipeline_type + "/" + pipeline_type) if inference_type.lower() == "realtime": url = url + "_realtime" elif inference_type.lower() == "batch": url = url + "_batch" else: raise BadRequest( "Bad request format. Inference type must be 'realtime' or 'batch'") if len(custom_container) > 0 and custom_container.endswith('.zip'): url = url + "_build_container.yaml" elif len(custom_container) == 0: url = url + "_builtin_container.yaml" else: raise BadRequest( 'Bad request. Custom container should point to apath to .zip file containing custom model assets.' ) return url
def handler(event, context): if "httpMethod" in event and event["httpMethod"] == "POST": # Lambda is being invoked from API Gateway if event["path"] == "/provisionpipeline": return provision_pipeline(json.loads(event["body"])) elif event["path"] == "/pipelinestatus": return pipeline_status(json.loads(event["body"])) else: raise BadRequest("Unacceptable event path. Path must be /provisionpipeline or /pipelinestatus") elif "pipeline_type" in event: # Lambda is being invoked from codepipeline/codebuild return provision_pipeline(event) else: raise BadRequest( "Bad request format. Expected httpMethod or pipeline_type, recevied none. Check documentation " + "for API & config formats." )
def handler(event, context): if 'httpMethod' in event and event[ 'httpMethod'] == 'POST': # Lambda is being invoked from API Gateway if event['path'] == '/provisionpipeline': return provision_pipeline(json.loads(event['body'])) elif event['path'] == '/pipelinestatus': return pipeline_status(json.loads(event['body'])) else: raise BadRequest( "Unacceptable event path. Path must be /provisionpipeline or /pipelinestatus" ) elif ("pipeline_type" in event): # Lambda is being invoked from codepipeline/codebuild return provision_pipeline(event) else: raise BadRequest( "Bad request format. Expected httpMethod or pipeline_type, recevied none. Check documentation for API & config formats." )
def validate(event): """ validate is a helper function that checks if all required input parameters are present in the handler's event object :event: Lambda function's event object :return: returns the event back if it passes the validation othewise it raises a bad request exception :raises: BadRequest Exception """ # get the required keys to validate the event required_keys = get_required_keys(event) for key in required_keys: if key not in event: logger.error(f"Request event did not have parameter: {key}") raise BadRequest(f"Bad request. API body does not have the necessary parameter: {key}") return event
def template_url(pipeline_type): """ template_url is a helper function that determines the cloudformation stack's file name based on inputs :pipeline_type: type of pipeline. Supported values: "byom_realtime_builtin"|"byom_realtime_custom"|"byom_batch_builtin"|"byom_batch_custom"| "byom_model_monitor"|"byom_image_builder"|"single_account_codepipeline"| "multi_account_codepipeline" :return: returns a link to the appropriate coudformation template files which can be one of these values: byom_realtime_inference_pipeline.yaml byom_batch_pipeline.yaml byom_model_monitor.yaml byom_custom_algorithm_image_builder.yaml single_account_codepipeline.yaml multi_account_codepipeline.yaml """ url = "https://" + os.environ["BLUEPRINT_BUCKET_URL"] + "/blueprints/byom" realtime_inference_template = "blueprints/byom/byom_realtime_inference_pipeline.yaml" batch_inference_template = "blueprints/byom/byom_batch_pipeline.yaml" templates_map = { "byom_realtime_builtin": realtime_inference_template, "byom_realtime_custom": realtime_inference_template, "byom_batch_builtin": batch_inference_template, "byom_batch_custom": batch_inference_template, "byom_model_monitor": "blueprints/byom/byom_model_monitor.yaml", "byom_image_builder": f"{url}/byom_custom_algorithm_image_builder.yaml", "single_account_codepipeline": f"{url}/single_account_codepipeline.yaml", "multi_account_codepipeline": f"{url}/multi_account_codepipeline.yaml", } if pipeline_type in list(templates_map.keys()): return templates_map[pipeline_type] else: raise BadRequest( f"Bad request. Pipeline type: {pipeline_type} is not supported.")
def get_template_parameters(event, is_multi_account, stage=None): pipeline_type = event.get("pipeline_type") region = os.environ["REGION"] kms_key_arn = get_stage_param(event, "kms_key_arn", stage) common_params = [ ("ASSETSBUCKET", os.environ["ASSETS_BUCKET"]), ("KMSKEYARN", kms_key_arn), ("BLUEPRINTBUCKET", os.environ["BLUEPRINT_BUCKET"]), ] if pipeline_type in [ "byom_realtime_builtin", "byom_realtime_custom", "byom_batch_builtin", "byom_batch_custom", ]: common_params.extend( get_common_realtime_batch_params(event, region, stage)) # add realtime specfic parameters if pipeline_type in ["byom_realtime_builtin", "byom_realtime_custom"]: common_params.extend(get_realtime_specific_params(event, stage)) # else add batch params else: common_params.extend(get_bacth_specific_params(event, stage)) return common_params elif pipeline_type == "byom_model_monitor": common_params.extend(get_model_monitor_params(event, region, stage)) return common_params elif pipeline_type == "byom_image_builder": return get_image_builder_params(event) else: raise BadRequest( "Bad request format. Please provide a supported pipeline")
def validate(event): """ validate is a helper function that checks if all required input parameters are present in the handler's event object :event: Lambda function's event object :return: returns the event back if it passes the validation othewise it raises a bad request exception :raises: BadRequest Exception """ required_keys = [ 'pipeline_type', 'custom_model_container', 'model_framework', 'model_framework_version', 'model_name', 'model_artifact_location', 'training_data', 'inference_instance', 'inference_type', 'batch_inference_data' ] for key in required_keys: if key not in event: logger.error(f"Request event did not have parameter: {key}") raise BadRequest( f'Bad request. API body does not have the necessary parameter: {key}' ) return event
def provision_pipeline(event, client=cloudformation_client): """ provision_pipeline takes the lambda event object and creates a cloudformation stack :event: event object from lambda function. It must containe: pipeline_type, custom_model_container, model_framework, model_framework_version, model_name, model_artifact_location, training_data, inference_instance, inference_type, batch_inference_data :client: boto3 cloudformation client. Not needed, it is only added for unit testing purpose :return: an object that has statusCode, body, isBase64Encoded, and headers. The body contains the arn of the stack this function has created """ response = {} new_event = validate(event) pipeline_type = new_event["pipeline_type"] custom_container = new_event["custom_model_container"] model_framework = new_event["model_framework"] model_framework_version = new_event["model_framework_version"] model_name = new_event["model_name"] model_artifact_location = new_event["model_artifact_location"] training_data = new_event["training_data"] inference_instance = new_event["inference_instance"] inference_type = new_event["inference_type"] batch_inference_data = new_event["batch_inference_data"] pipeline_template_url = template_url(inference_type, custom_container, pipeline_type) template_parameters = [] if pipeline_type == "byom": template_parameters = [ { "ParameterKey": "NOTIFICATIONEMAIL", "ParameterValue": os.environ["NOTIFICATION_EMAIL"], "UsePreviousValue": True, }, { "ParameterKey": "BLUEPRINTBUCKET", "ParameterValue": os.environ["BLUEPRINT_BUCKET"], "UsePreviousValue": True, }, { "ParameterKey": "ACCESSBUCKET", "ParameterValue": os.environ["ACCESS_BUCKET"], "UsePreviousValue": True, }, { "ParameterKey": "CUSTOMCONTAINER", "ParameterValue": custom_container, "UsePreviousValue": True, }, { "ParameterKey": "MODELFRAMEWORK", "ParameterValue": model_framework, "UsePreviousValue": True, }, { "ParameterKey": "MODELFRAMEWORKVERSION", "ParameterValue": model_framework_version, "UsePreviousValue": True, }, { "ParameterKey": "MODELNAME", "ParameterValue": model_name, "UsePreviousValue": True, }, { "ParameterKey": "MODELARTIFACTLOCATION", "ParameterValue": model_artifact_location, "UsePreviousValue": True, }, { "ParameterKey": "TRAININGDATA", "ParameterValue": training_data, "UsePreviousValue": True, }, { "ParameterKey": "INFERENCEINSTANCE", "ParameterValue": inference_instance, "UsePreviousValue": True, }, { "ParameterKey": "INFERENCETYPE", "ParameterValue": inference_type, "UsePreviousValue": True, }, { "ParameterKey": "BATCHINFERENCEDATA", "ParameterValue": batch_inference_data, "UsePreviousValue": True, }, ] # add elif (else if) here to add more pipeline types to the solution else: raise BadRequest( "Bad request format. Pipeline type not supported. Check documentation for API & config formats." ) # create a pipeline stack using user parameters and specified blueprint stack_response = client.create_stack( StackName="{}-{}".format(os.environ["PIPELINE_STACK_NAME"], model_name), TemplateURL=pipeline_template_url, Parameters=template_parameters, Capabilities=["CAPABILITY_IAM"], OnFailure="DO_NOTHING", RoleARN=os.environ["CFN_ROLE_ARN"], Tags=[ { "Key": "purpose", "Value": "test" }, ], ) logger.info("New pipelin stack created") logger.debug(stack_response) response = { "statusCode": 200, "isBase64Encoded": False, "body": json.dumps({ "message": "success: stack creation started", "pipeline_id": stack_response["StackId"], }), "headers": { "Content-Type": "plain/text" }, } return response
def get_required_keys(pipeline_type): # Realtime/batch pipelines if pipeline_type in [ "byom_realtime_builtin", "byom_realtime_custom", "byom_batch_builtin", "byom_batch_custom", ]: common_keys = [ "pipeline_type", "model_name", "model_artifact_location", "inference_instance", ] builtin_model_keys = [ "model_framework", "model_framework_version", ] custom_model_keys = ["custom_image_uri"] realtime_specific_keys = ["data_capture_location"] batch_specific_keys = [ "batch_inference_data", "batch_job_output_location" ] keys_map = { "byom_realtime_builtin": common_keys + builtin_model_keys + realtime_specific_keys, "byom_realtime_custom": common_keys + custom_model_keys + realtime_specific_keys, "byom_batch_builtin": common_keys + builtin_model_keys + batch_specific_keys, "byom_batch_custom": common_keys + custom_model_keys + batch_specific_keys, } return keys_map[pipeline_type] # Model Monitor pipeline elif pipeline_type == "byom_model_monitor": return [ "pipeline_type", "model_name", "endpoint_name", "training_data", "baseline_job_output_location", "data_capture_location", "monitoring_output_location", "schedule_expression", "instance_type", "instance_volume_size", ] # Image Builder pipeline elif pipeline_type == "byom_image_builder": return [ "pipeline_type", "custom_algorithm_docker", "ecr_repo_name", "image_tag", ] else: raise BadRequest( "Bad request format. Pipeline type not supported. Check documentation for API & config formats" )
def get_required_keys(event): required_keys = [] if event.get("pipeline_type", "").lower() == "byom": # common keys common_keys = [ "pipeline_type", "model_name", "model_artifact_location", "inference_instance", "inference_type", ] if ( event.get("inference_type", "").lower().strip() == "realtime" and event.get("model_framework", "").strip() != "" ): required_keys = common_keys + [ "model_framework", "model_framework_version", ] elif ( event.get("inference_type", "").lower().strip() == "batch" and event.get("model_framework", "").strip() != "" ): required_keys = common_keys + [ "model_framework", "model_framework_version", "batch_inference_data", ] elif ( event.get("inference_type", "").lower().strip() == "realtime" and event.get("model_framework", "").strip() == "" ): required_keys = common_keys + [ "custom_model_container", ] elif ( event.get("inference_type", "").lower().strip() == "batch" and event.get("model_framework", "").strip() == "" ): required_keys = common_keys + [ "custom_model_container", "batch_inference_data", ] else: raise BadRequest("Bad request. missing keys for byom") elif event.get("pipeline_type", "").lower().strip() == "model_monitor": required_keys = [ "pipeline_type", "endpoint_name", "baseline_job_output_location", "monitoring_output_location", "schedule_expression", "training_data", "instance_type", "instance_volume_size", ] if event.get("monitoring_type", "").lower().strip() in ["modelquality", "modelbias", "modelexplainability"]: required_keys = required_keys + [ "features_attribute", "inference_attribute", "probability_attribute", "probability_threshold_attribute", ] # monitoring_type is optional, but if the client provided a value not in the allowed values, raise an exception elif event.get("monitoring_type", "").lower().strip() not in [ "", "dataquality", "modelquality", "modelbias", "modelexplainability", ]: raise BadRequest( "Bad request. MonitoringType supported are 'DataQuality'|'ModelQuality'|'ModelBias'|'ModelExplainability'" ) else: raise BadRequest( "Bad request format. Pipeline type not supported. Check documentation for API & config formats" ) return required_keys
def get_template_parameters(event): pipeline_type = event.get("pipeline_type", "") model_framework = event.get("model_framework", "") model_framework_version = event.get("model_framework_version", "") model_name = event.get("model_name", "").lower().strip() model_artifact_location = event.get("model_artifact_location", "") inference_instance = event.get("inference_instance", "") custom_container = event.get("custom_model_container", "") batch_inference_data = event.get("batch_inference_data", "") pipeline_stack_name = os.environ["PIPELINE_STACK_NAME"] endpoint_name = event.get("endpoint_name", "") template_parameters = [ { "ParameterKey": "NOTIFICATIONEMAIL", "ParameterValue": os.environ["NOTIFICATION_EMAIL"], "UsePreviousValue": True, }, { "ParameterKey": "BLUEPRINTBUCKET", "ParameterValue": os.environ["BLUEPRINT_BUCKET"], "UsePreviousValue": True, }, { "ParameterKey": "ASSETSBUCKET", "ParameterValue": os.environ["ASSETS_BUCKET"], "UsePreviousValue": True, }, ] if pipeline_type == "byom": provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}" # construct common parameters across byom builtin/custom and realtime/batch template_parameters.extend( [ { "ParameterKey": "MODELNAME", "ParameterValue": model_name, "UsePreviousValue": True, }, { "ParameterKey": "MODELARTIFACTLOCATION", "ParameterValue": model_artifact_location, "UsePreviousValue": True, }, { "ParameterKey": "INFERENCEINSTANCE", "ParameterValue": inference_instance, "UsePreviousValue": True, }, ] ) if ( event.get("inference_type", "").lower().strip() == "realtime" and event.get("model_framework", "").strip() != "" ): # update stack name provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineReatimeBuiltIn" # add builtin/realtime parameters template_parameters.extend( [ { "ParameterKey": "MODELFRAMEWORK", "ParameterValue": model_framework, "UsePreviousValue": True, }, { "ParameterKey": "MODELFRAMEWORKVERSION", "ParameterValue": model_framework_version, "UsePreviousValue": True, }, ] ) elif ( event.get("inference_type", "").lower().strip() == "batch" and event.get("model_framework", "").strip() != "" ): # update stack name provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineBatchBuiltIn" # add builtin/batch parameters template_parameters.extend( [ { "ParameterKey": "MODELFRAMEWORK", "ParameterValue": model_framework, "UsePreviousValue": True, }, { "ParameterKey": "MODELFRAMEWORKVERSION", "ParameterValue": model_framework_version, "UsePreviousValue": True, }, { "ParameterKey": "BATCHINFERENCEDATA", "ParameterValue": batch_inference_data, "UsePreviousValue": True, }, ] ) elif ( event.get("inference_type", "").lower().strip() == "realtime" and event.get("model_framework", "").strip() == "" ): # update stack name provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineRealtimeBuild" # add custom/realtime parameters template_parameters.extend( [ { "ParameterKey": "CUSTOMCONTAINER", "ParameterValue": custom_container, "UsePreviousValue": True, }, ] ) elif ( event.get("inference_type", "").lower().strip() == "batch" and event.get("model_framework", "").strip() == "" ): # update stack name provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineBatchBuild" # add custom/batch parameters template_parameters.extend( [ { "ParameterKey": "CUSTOMCONTAINER", "ParameterValue": custom_container, "UsePreviousValue": True, }, { "ParameterKey": "BATCHINFERENCEDATA", "ParameterValue": batch_inference_data, "UsePreviousValue": True, }, ] ) else: raise BadRequest( "Bad request format. Pipeline type not supported. Check documentation for API & config formats." ) elif pipeline_type == "model_monitor": provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{endpoint_name}-model-monitor" # get the optional monitoring type monitoring_type = event.get("monitoring_type", "dataquality").lower().strip() # create uniques names for data baseline and monitoring schedule. The names need to be unique because # Old jobs are not deleted, and there is a high possibility that the client create a job with the same name # which will throw an error. baseline_job_name = f"{endpoint_name}-baseline-job-{str(uuid.uuid4())[:8]}" monitoring_schedule_name = f"{endpoint_name}-monitor-{monitoring_type}-{str(uuid.uuid4())[:8]}" # add model monitor parameters template_parameters.extend( [ { "ParameterKey": "BASELINEJOBOUTPUTLOCATION", "ParameterValue": event.get("baseline_job_output_location"), "UsePreviousValue": True, }, { "ParameterKey": "ENDPOINTNAME", "ParameterValue": endpoint_name, "UsePreviousValue": True, }, { "ParameterKey": "BASELINEJOBNAME", "ParameterValue": baseline_job_name, "UsePreviousValue": True, }, { "ParameterKey": "MONITORINGSCHEDULENAME", "ParameterValue": monitoring_schedule_name, "UsePreviousValue": True, }, { "ParameterKey": "MONITORINGOUTPUTLOCATION", "ParameterValue": event.get("monitoring_output_location"), "UsePreviousValue": True, }, { "ParameterKey": "SCHEDULEEXPRESSION", "ParameterValue": event.get("schedule_expression"), "UsePreviousValue": True, }, { "ParameterKey": "TRAININGDATA", "ParameterValue": event.get("training_data"), "UsePreviousValue": True, }, { "ParameterKey": "INSTANCETYPE", "ParameterValue": event.get("instance_type"), "UsePreviousValue": True, }, { "ParameterKey": "INSTANCEVOLUMESIZE", "ParameterValue": event.get("instance_volume_size"), "UsePreviousValue": True, }, { "ParameterKey": "MONITORINGTYPE", "ParameterValue": event.get("monitoring_type", "dataquality"), "UsePreviousValue": True, }, { "ParameterKey": "MAXRUNTIMESIZE", "ParameterValue": event.get("max_runtime_seconds", "-1"), "UsePreviousValue": True, }, ] ) else: raise BadRequest( "Bad request format. Pipeline type not supported. Check documentation for API & config formats." ) return (provisioned_pipeline_stack_name, template_parameters)