Example #1
0
def template_url(inference_type, custom_container, pipeline_type):
    """
    template_url is a helper function that determines the cloudformation stack's file name based on
    inputs

    :inference_type: type of inference from lambda event input. Possible values: 'batch' or 'realtime'
    :custom_container: whether a custom container build is needed in the pipeline or no. Possible values: 'True' or 'False'

    :return: returns a link to the appropriate coudformation template files which can be one of these values:
    byom_realtime_build_container.yaml
    byom_realtime_builtin_container.yaml
    byom_batch_build_container.yaml
    byom_batch_builtin_container.yaml
    """
    url = ("https://" + os.environ["BLUEPRINT_BUCKET_URL"] + "/blueprints/" +
           pipeline_type + "/" + pipeline_type)
    if inference_type.lower() == "realtime":
        url = url + "_realtime"
    elif inference_type.lower() == "batch":
        url = url + "_batch"
    else:
        raise BadRequest(
            "Bad request format. Inference type must be 'realtime' or 'batch'")

    if len(custom_container) > 0 and custom_container.endswith('.zip'):
        url = url + "_build_container.yaml"
    elif len(custom_container) == 0:
        url = url + "_builtin_container.yaml"
    else:
        raise BadRequest(
            'Bad request. Custom container should point to apath to .zip file containing custom model assets.'
        )
    return url
def handler(event, context):
    if "httpMethod" in event and event["httpMethod"] == "POST":  # Lambda is being invoked from API Gateway
        if event["path"] == "/provisionpipeline":
            return provision_pipeline(json.loads(event["body"]))
        elif event["path"] == "/pipelinestatus":
            return pipeline_status(json.loads(event["body"]))
        else:
            raise BadRequest("Unacceptable event path. Path must be /provisionpipeline or /pipelinestatus")
    elif "pipeline_type" in event:  # Lambda is being invoked from codepipeline/codebuild
        return provision_pipeline(event)
    else:
        raise BadRequest(
            "Bad request format. Expected httpMethod or pipeline_type, recevied none. Check documentation "
            + "for API & config formats."
        )
Example #3
0
def handler(event, context):
    if 'httpMethod' in event and event[
            'httpMethod'] == 'POST':  # Lambda is being invoked from API Gateway
        if event['path'] == '/provisionpipeline':
            return provision_pipeline(json.loads(event['body']))
        elif event['path'] == '/pipelinestatus':
            return pipeline_status(json.loads(event['body']))
        else:
            raise BadRequest(
                "Unacceptable event path. Path must be /provisionpipeline or /pipelinestatus"
            )
    elif ("pipeline_type"
          in event):  # Lambda is being invoked from codepipeline/codebuild
        return provision_pipeline(event)
    else:
        raise BadRequest(
            "Bad request format. Expected httpMethod or pipeline_type, recevied none. Check documentation for API & config formats."
        )
def validate(event):
    """
    validate is a helper function that checks if all required input parameters are present in the handler's event object

    :event: Lambda function's event object

    :return: returns the event back if it passes the validation othewise it raises a bad request exception
    :raises: BadRequest Exception
    """
    # get the required keys to validate the event
    required_keys = get_required_keys(event)
    for key in required_keys:
        if key not in event:
            logger.error(f"Request event did not have parameter: {key}")
            raise BadRequest(f"Bad request. API body does not have the necessary parameter: {key}")

    return event
def template_url(pipeline_type):
    """
    template_url is a helper function that determines the cloudformation stack's file name based on
    inputs

    :pipeline_type: type of pipeline. Supported values:
    "byom_realtime_builtin"|"byom_realtime_custom"|"byom_batch_builtin"|"byom_batch_custom"|
    "byom_model_monitor"|"byom_image_builder"|"single_account_codepipeline"|
    "multi_account_codepipeline"

    :return: returns a link to the appropriate coudformation template files which can be one of these values:
    byom_realtime_inference_pipeline.yaml
    byom_batch_pipeline.yaml
    byom_model_monitor.yaml
    byom_custom_algorithm_image_builder.yaml
    single_account_codepipeline.yaml
    multi_account_codepipeline.yaml
    """
    url = "https://" + os.environ["BLUEPRINT_BUCKET_URL"] + "/blueprints/byom"
    realtime_inference_template = "blueprints/byom/byom_realtime_inference_pipeline.yaml"
    batch_inference_template = "blueprints/byom/byom_batch_pipeline.yaml"

    templates_map = {
        "byom_realtime_builtin": realtime_inference_template,
        "byom_realtime_custom": realtime_inference_template,
        "byom_batch_builtin": batch_inference_template,
        "byom_batch_custom": batch_inference_template,
        "byom_model_monitor": "blueprints/byom/byom_model_monitor.yaml",
        "byom_image_builder":
        f"{url}/byom_custom_algorithm_image_builder.yaml",
        "single_account_codepipeline":
        f"{url}/single_account_codepipeline.yaml",
        "multi_account_codepipeline": f"{url}/multi_account_codepipeline.yaml",
    }

    if pipeline_type in list(templates_map.keys()):
        return templates_map[pipeline_type]

    else:
        raise BadRequest(
            f"Bad request. Pipeline type: {pipeline_type} is not supported.")
def get_template_parameters(event, is_multi_account, stage=None):
    pipeline_type = event.get("pipeline_type")
    region = os.environ["REGION"]

    kms_key_arn = get_stage_param(event, "kms_key_arn", stage)
    common_params = [
        ("ASSETSBUCKET", os.environ["ASSETS_BUCKET"]),
        ("KMSKEYARN", kms_key_arn),
        ("BLUEPRINTBUCKET", os.environ["BLUEPRINT_BUCKET"]),
    ]
    if pipeline_type in [
            "byom_realtime_builtin",
            "byom_realtime_custom",
            "byom_batch_builtin",
            "byom_batch_custom",
    ]:

        common_params.extend(
            get_common_realtime_batch_params(event, region, stage))

        # add realtime specfic parameters
        if pipeline_type in ["byom_realtime_builtin", "byom_realtime_custom"]:
            common_params.extend(get_realtime_specific_params(event, stage))
        # else add batch params
        else:
            common_params.extend(get_bacth_specific_params(event, stage))

        return common_params

    elif pipeline_type == "byom_model_monitor":
        common_params.extend(get_model_monitor_params(event, region, stage))
        return common_params

    elif pipeline_type == "byom_image_builder":
        return get_image_builder_params(event)

    else:
        raise BadRequest(
            "Bad request format. Please provide a supported pipeline")
Example #7
0
def validate(event):
    """
    validate is a helper function that checks if all required input parameters are present in the handler's event object

    :event: Lambda function's event object

    :return: returns the event back if it passes the validation othewise it raises a bad request exception
    :raises: BadRequest Exception
    """
    required_keys = [
        'pipeline_type', 'custom_model_container', 'model_framework',
        'model_framework_version', 'model_name', 'model_artifact_location',
        'training_data', 'inference_instance', 'inference_type',
        'batch_inference_data'
    ]
    for key in required_keys:
        if key not in event:
            logger.error(f"Request event did not have parameter: {key}")
            raise BadRequest(
                f'Bad request. API body does not have the necessary parameter: {key}'
            )

    return event
Example #8
0
def provision_pipeline(event, client=cloudformation_client):
    """
    provision_pipeline takes the lambda event object and creates a cloudformation stack

    :event: event object from lambda function. It must containe: pipeline_type, custom_model_container,
    model_framework, model_framework_version, model_name, model_artifact_location, training_data,
    inference_instance, inference_type, batch_inference_data
    :client: boto3 cloudformation client. Not needed, it is only added for unit testing purpose
    :return: an object that has statusCode, body, isBase64Encoded, and headers. The body contains
    the arn of the stack this function has created
    """
    response = {}
    new_event = validate(event)
    pipeline_type = new_event["pipeline_type"]
    custom_container = new_event["custom_model_container"]
    model_framework = new_event["model_framework"]
    model_framework_version = new_event["model_framework_version"]
    model_name = new_event["model_name"]
    model_artifact_location = new_event["model_artifact_location"]
    training_data = new_event["training_data"]
    inference_instance = new_event["inference_instance"]
    inference_type = new_event["inference_type"]
    batch_inference_data = new_event["batch_inference_data"]

    pipeline_template_url = template_url(inference_type, custom_container,
                                         pipeline_type)

    template_parameters = []
    if pipeline_type == "byom":
        template_parameters = [
            {
                "ParameterKey": "NOTIFICATIONEMAIL",
                "ParameterValue": os.environ["NOTIFICATION_EMAIL"],
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "BLUEPRINTBUCKET",
                "ParameterValue": os.environ["BLUEPRINT_BUCKET"],
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "ACCESSBUCKET",
                "ParameterValue": os.environ["ACCESS_BUCKET"],
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "CUSTOMCONTAINER",
                "ParameterValue": custom_container,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "MODELFRAMEWORK",
                "ParameterValue": model_framework,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "MODELFRAMEWORKVERSION",
                "ParameterValue": model_framework_version,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "MODELNAME",
                "ParameterValue": model_name,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "MODELARTIFACTLOCATION",
                "ParameterValue": model_artifact_location,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "TRAININGDATA",
                "ParameterValue": training_data,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "INFERENCEINSTANCE",
                "ParameterValue": inference_instance,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "INFERENCETYPE",
                "ParameterValue": inference_type,
                "UsePreviousValue": True,
            },
            {
                "ParameterKey": "BATCHINFERENCEDATA",
                "ParameterValue": batch_inference_data,
                "UsePreviousValue": True,
            },
        ]
    # add elif (else if) here to add more pipeline types to the solution
    else:
        raise BadRequest(
            "Bad request format. Pipeline type not supported. Check documentation for API & config formats."
        )

    # create a pipeline stack using user parameters and specified blueprint
    stack_response = client.create_stack(
        StackName="{}-{}".format(os.environ["PIPELINE_STACK_NAME"],
                                 model_name),
        TemplateURL=pipeline_template_url,
        Parameters=template_parameters,
        Capabilities=["CAPABILITY_IAM"],
        OnFailure="DO_NOTHING",
        RoleARN=os.environ["CFN_ROLE_ARN"],
        Tags=[
            {
                "Key": "purpose",
                "Value": "test"
            },
        ],
    )
    logger.info("New pipelin stack created")
    logger.debug(stack_response)
    response = {
        "statusCode":
        200,
        "isBase64Encoded":
        False,
        "body":
        json.dumps({
            "message": "success: stack creation started",
            "pipeline_id": stack_response["StackId"],
        }),
        "headers": {
            "Content-Type": "plain/text"
        },
    }
    return response
def get_required_keys(pipeline_type):
    # Realtime/batch pipelines
    if pipeline_type in [
            "byom_realtime_builtin",
            "byom_realtime_custom",
            "byom_batch_builtin",
            "byom_batch_custom",
    ]:
        common_keys = [
            "pipeline_type",
            "model_name",
            "model_artifact_location",
            "inference_instance",
        ]
        builtin_model_keys = [
            "model_framework",
            "model_framework_version",
        ]
        custom_model_keys = ["custom_image_uri"]
        realtime_specific_keys = ["data_capture_location"]
        batch_specific_keys = [
            "batch_inference_data", "batch_job_output_location"
        ]

        keys_map = {
            "byom_realtime_builtin":
            common_keys + builtin_model_keys + realtime_specific_keys,
            "byom_realtime_custom":
            common_keys + custom_model_keys + realtime_specific_keys,
            "byom_batch_builtin":
            common_keys + builtin_model_keys + batch_specific_keys,
            "byom_batch_custom":
            common_keys + custom_model_keys + batch_specific_keys,
        }

        return keys_map[pipeline_type]

    # Model Monitor pipeline
    elif pipeline_type == "byom_model_monitor":
        return [
            "pipeline_type",
            "model_name",
            "endpoint_name",
            "training_data",
            "baseline_job_output_location",
            "data_capture_location",
            "monitoring_output_location",
            "schedule_expression",
            "instance_type",
            "instance_volume_size",
        ]
    # Image Builder pipeline
    elif pipeline_type == "byom_image_builder":
        return [
            "pipeline_type",
            "custom_algorithm_docker",
            "ecr_repo_name",
            "image_tag",
        ]

    else:
        raise BadRequest(
            "Bad request format. Pipeline type not supported. Check documentation for API & config formats"
        )
def get_required_keys(event):
    required_keys = []
    if event.get("pipeline_type", "").lower() == "byom":
        # common keys
        common_keys = [
            "pipeline_type",
            "model_name",
            "model_artifact_location",
            "inference_instance",
            "inference_type",
        ]

        if (
            event.get("inference_type", "").lower().strip() == "realtime"
            and event.get("model_framework", "").strip() != ""
        ):
            required_keys = common_keys + [
                "model_framework",
                "model_framework_version",
            ]
        elif (
            event.get("inference_type", "").lower().strip() == "batch"
            and event.get("model_framework", "").strip() != ""
        ):
            required_keys = common_keys + [
                "model_framework",
                "model_framework_version",
                "batch_inference_data",
            ]
        elif (
            event.get("inference_type", "").lower().strip() == "realtime"
            and event.get("model_framework", "").strip() == ""
        ):
            required_keys = common_keys + [
                "custom_model_container",
            ]
        elif (
            event.get("inference_type", "").lower().strip() == "batch"
            and event.get("model_framework", "").strip() == ""
        ):
            required_keys = common_keys + [
                "custom_model_container",
                "batch_inference_data",
            ]
        else:
            raise BadRequest("Bad request. missing keys for byom")
    elif event.get("pipeline_type", "").lower().strip() == "model_monitor":
        required_keys = [
            "pipeline_type",
            "endpoint_name",
            "baseline_job_output_location",
            "monitoring_output_location",
            "schedule_expression",
            "training_data",
            "instance_type",
            "instance_volume_size",
        ]

        if event.get("monitoring_type", "").lower().strip() in ["modelquality", "modelbias", "modelexplainability"]:
            required_keys = required_keys + [
                "features_attribute",
                "inference_attribute",
                "probability_attribute",
                "probability_threshold_attribute",
            ]
        # monitoring_type is optional, but if the client provided a value not in the allowed values, raise an exception
        elif event.get("monitoring_type", "").lower().strip() not in [
            "",
            "dataquality",
            "modelquality",
            "modelbias",
            "modelexplainability",
        ]:
            raise BadRequest(
                "Bad request. MonitoringType supported are 'DataQuality'|'ModelQuality'|'ModelBias'|'ModelExplainability'"
            )
    else:
        raise BadRequest(
            "Bad request format. Pipeline type not supported. Check documentation for API & config formats"
        )

    return required_keys
def get_template_parameters(event):
    pipeline_type = event.get("pipeline_type", "")
    model_framework = event.get("model_framework", "")
    model_framework_version = event.get("model_framework_version", "")
    model_name = event.get("model_name", "").lower().strip()
    model_artifact_location = event.get("model_artifact_location", "")
    inference_instance = event.get("inference_instance", "")
    custom_container = event.get("custom_model_container", "")
    batch_inference_data = event.get("batch_inference_data", "")
    pipeline_stack_name = os.environ["PIPELINE_STACK_NAME"]
    endpoint_name = event.get("endpoint_name", "")
    template_parameters = [
        {
            "ParameterKey": "NOTIFICATIONEMAIL",
            "ParameterValue": os.environ["NOTIFICATION_EMAIL"],
            "UsePreviousValue": True,
        },
        {
            "ParameterKey": "BLUEPRINTBUCKET",
            "ParameterValue": os.environ["BLUEPRINT_BUCKET"],
            "UsePreviousValue": True,
        },
        {
            "ParameterKey": "ASSETSBUCKET",
            "ParameterValue": os.environ["ASSETS_BUCKET"],
            "UsePreviousValue": True,
        },
    ]
    if pipeline_type == "byom":
        provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}"
        # construct common parameters across byom builtin/custom and realtime/batch
        template_parameters.extend(
            [
                {
                    "ParameterKey": "MODELNAME",
                    "ParameterValue": model_name,
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "MODELARTIFACTLOCATION",
                    "ParameterValue": model_artifact_location,
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "INFERENCEINSTANCE",
                    "ParameterValue": inference_instance,
                    "UsePreviousValue": True,
                },
            ]
        )
        if (
            event.get("inference_type", "").lower().strip() == "realtime"
            and event.get("model_framework", "").strip() != ""
        ):
            # update stack name
            provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineReatimeBuiltIn"
            # add builtin/realtime parameters
            template_parameters.extend(
                [
                    {
                        "ParameterKey": "MODELFRAMEWORK",
                        "ParameterValue": model_framework,
                        "UsePreviousValue": True,
                    },
                    {
                        "ParameterKey": "MODELFRAMEWORKVERSION",
                        "ParameterValue": model_framework_version,
                        "UsePreviousValue": True,
                    },
                ]
            )
        elif (
            event.get("inference_type", "").lower().strip() == "batch"
            and event.get("model_framework", "").strip() != ""
        ):
            # update stack name
            provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineBatchBuiltIn"
            # add builtin/batch parameters
            template_parameters.extend(
                [
                    {
                        "ParameterKey": "MODELFRAMEWORK",
                        "ParameterValue": model_framework,
                        "UsePreviousValue": True,
                    },
                    {
                        "ParameterKey": "MODELFRAMEWORKVERSION",
                        "ParameterValue": model_framework_version,
                        "UsePreviousValue": True,
                    },
                    {
                        "ParameterKey": "BATCHINFERENCEDATA",
                        "ParameterValue": batch_inference_data,
                        "UsePreviousValue": True,
                    },
                ]
            )
        elif (
            event.get("inference_type", "").lower().strip() == "realtime"
            and event.get("model_framework", "").strip() == ""
        ):
            # update stack name
            provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineRealtimeBuild"
            # add custom/realtime parameters
            template_parameters.extend(
                [
                    {
                        "ParameterKey": "CUSTOMCONTAINER",
                        "ParameterValue": custom_container,
                        "UsePreviousValue": True,
                    },
                ]
            )
        elif (
            event.get("inference_type", "").lower().strip() == "batch"
            and event.get("model_framework", "").strip() == ""
        ):
            # update stack name
            provisioned_pipeline_stack_name = f"{provisioned_pipeline_stack_name}-BYOMPipelineBatchBuild"
            # add custom/batch parameters
            template_parameters.extend(
                [
                    {
                        "ParameterKey": "CUSTOMCONTAINER",
                        "ParameterValue": custom_container,
                        "UsePreviousValue": True,
                    },
                    {
                        "ParameterKey": "BATCHINFERENCEDATA",
                        "ParameterValue": batch_inference_data,
                        "UsePreviousValue": True,
                    },
                ]
            )
        else:
            raise BadRequest(
                "Bad request format. Pipeline type not supported. Check documentation for API & config formats."
            )

    elif pipeline_type == "model_monitor":
        provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{endpoint_name}-model-monitor"
        # get the optional monitoring type
        monitoring_type = event.get("monitoring_type", "dataquality").lower().strip()
        # create uniques names for data baseline and monitoring schedule. The names need to be unique because
        # Old jobs are not deleted, and there is a high possibility that the client create a job with the same name
        # which will throw an error.
        baseline_job_name = f"{endpoint_name}-baseline-job-{str(uuid.uuid4())[:8]}"
        monitoring_schedule_name = f"{endpoint_name}-monitor-{monitoring_type}-{str(uuid.uuid4())[:8]}"
        # add model monitor parameters
        template_parameters.extend(
            [
                {
                    "ParameterKey": "BASELINEJOBOUTPUTLOCATION",
                    "ParameterValue": event.get("baseline_job_output_location"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "ENDPOINTNAME",
                    "ParameterValue": endpoint_name,
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "BASELINEJOBNAME",
                    "ParameterValue": baseline_job_name,
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "MONITORINGSCHEDULENAME",
                    "ParameterValue": monitoring_schedule_name,
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "MONITORINGOUTPUTLOCATION",
                    "ParameterValue": event.get("monitoring_output_location"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "SCHEDULEEXPRESSION",
                    "ParameterValue": event.get("schedule_expression"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "TRAININGDATA",
                    "ParameterValue": event.get("training_data"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "INSTANCETYPE",
                    "ParameterValue": event.get("instance_type"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "INSTANCEVOLUMESIZE",
                    "ParameterValue": event.get("instance_volume_size"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "MONITORINGTYPE",
                    "ParameterValue": event.get("monitoring_type", "dataquality"),
                    "UsePreviousValue": True,
                },
                {
                    "ParameterKey": "MAXRUNTIMESIZE",
                    "ParameterValue": event.get("max_runtime_seconds", "-1"),
                    "UsePreviousValue": True,
                },
            ]
        )
    else:
        raise BadRequest(
            "Bad request format. Pipeline type not supported. Check documentation for API & config formats."
        )

    return (provisioned_pipeline_stack_name, template_parameters)