Exemple #1
0
    def add(self, deployment_pb):
        try:
            deployment_spec = deployment_pb.spec
            sagemaker_config = deployment_spec.sagemaker_operator_config
            sagemaker_config.region = (sagemaker_config.region
                                       or get_default_aws_region())
            if not sagemaker_config.region:
                raise InvalidArgument('AWS region is missing')

            ensure_docker_available_or_raise()
            if sagemaker_config is None:
                raise YataiDeploymentException(
                    'Sagemaker configuration is missing.')

            bento_pb = self.yatai_service.GetBento(
                GetBentoRequest(
                    bento_name=deployment_spec.bento_name,
                    bento_version=deployment_spec.bento_version,
                ))
            if bento_pb.bento.uri.type not in (BentoUri.LOCAL, BentoUri.S3):
                raise BentoMLException(
                    'BentoML currently not support {} repository'.format(
                        BentoUri.StorageType.Name(bento_pb.bento.uri.type)))
            return self._add(deployment_pb, bento_pb, bento_pb.bento.uri.uri)

        except BentoMLException as error:
            deployment_pb.state.state = DeploymentState.ERROR
            deployment_pb.state.error_message = (
                f'Error creating SageMaker deployment: {str(error)}')
            return ApplyDeploymentResponse(status=error.status_proto,
                                           deployment=deployment_pb)
Exemple #2
0
    def delete(self, deployment_pb):
        try:
            logger.debug('Deleting AWS Lambda deployment')

            deployment_spec = deployment_pb.spec
            lambda_deployment_config = deployment_spec.aws_lambda_operator_config
            lambda_deployment_config.region = (
                lambda_deployment_config.region or get_default_aws_region()
            )
            if not lambda_deployment_config.region:
                raise InvalidArgument('AWS region is missing')

            cf_client = boto3.client('cloudformation', lambda_deployment_config.region)
            stack_name = generate_aws_compatible_string(
                deployment_pb.namespace, deployment_pb.name
            )
            if deployment_pb.state.info_json:
                deployment_info_json = json.loads(deployment_pb.state.info_json)
                bucket_name = deployment_info_json.get('s3_bucket')
                if bucket_name:
                    _cleanup_s3_bucket_if_exist(
                        bucket_name, lambda_deployment_config.region
                    )

            logger.debug(
                'Deleting AWS CloudFormation: %s that includes Lambda function '
                'and related resources',
                stack_name,
            )
            cf_client.delete_stack(StackName=stack_name)
            return DeleteDeploymentResponse(status=Status.OK())

        except BentoMLException as error:
            return DeleteDeploymentResponse(status=error.status_proto)
Exemple #3
0
    def delete(self, deployment_pb):
        try:
            deployment_spec = deployment_pb.spec
            sagemaker_config = deployment_spec.sagemaker_operator_config
            sagemaker_config.region = (sagemaker_config.region
                                       or get_default_aws_region())
            if not sagemaker_config.region:
                raise InvalidArgument('AWS region is missing')

            delete_sagemaker_deployment_resources_if_exist(deployment_pb)

            return DeleteDeploymentResponse(status=Status.OK())
        except BentoMLException as error:
            return DeleteDeploymentResponse(status=error.status_proto)
Exemple #4
0
    def describe(self, deployment_pb):
        try:
            deployment_spec = deployment_pb.spec
            sagemaker_config = deployment_spec.sagemaker_operator_config
            sagemaker_config.region = (sagemaker_config.region
                                       or get_default_aws_region())
            if not sagemaker_config.region:
                raise InvalidArgument('AWS region is missing')
            sagemaker_client = boto3.client('sagemaker',
                                            sagemaker_config.region)
            _, _, sagemaker_endpoint_name = _get_sagemaker_resource_names(
                deployment_pb)

            try:
                endpoint_status_response = sagemaker_client.describe_endpoint(
                    EndpointName=sagemaker_endpoint_name)
            except ClientError as e:
                raise _aws_client_error_to_bentoml_exception(
                    e,
                    f"Failed to fetch current status of sagemaker endpoint "
                    f"'{sagemaker_endpoint_name}'",
                )

            logger.debug("AWS describe endpoint response: %s",
                         endpoint_status_response)
            endpoint_status = endpoint_status_response["EndpointStatus"]

            service_state = ENDPOINT_STATUS_TO_STATE[endpoint_status]

            deployment_state = DeploymentState(
                state=service_state,
                info_json=json.dumps(endpoint_status_response, default=str),
            )
            deployment_state.timestamp.GetCurrentTime()

            return DescribeDeploymentResponse(state=deployment_state,
                                              status=Status.OK())
        except BentoMLException as error:
            return DescribeDeploymentResponse(status=error.status_proto)
Exemple #5
0
    def describe(self, deployment_pb):
        try:
            deployment_spec = deployment_pb.spec
            lambda_deployment_config = deployment_spec.aws_lambda_operator_config
            lambda_deployment_config.region = (
                lambda_deployment_config.region or get_default_aws_region()
            )
            if not lambda_deployment_config.region:
                raise InvalidArgument('AWS region is missing')

            bento_pb = self.yatai_service.GetBento(
                GetBentoRequest(
                    bento_name=deployment_spec.bento_name,
                    bento_version=deployment_spec.bento_version,
                )
            )
            bento_service_metadata = bento_pb.bento.bento_service_metadata
            api_names = (
                [lambda_deployment_config.api_name]
                if lambda_deployment_config.api_name
                else [api.name for api in bento_service_metadata.apis]
            )

            try:
                cf_client = boto3.client(
                    'cloudformation', lambda_deployment_config.region
                )
                cloud_formation_stack_result = cf_client.describe_stacks(
                    StackName='{ns}-{name}'.format(
                        ns=deployment_pb.namespace, name=deployment_pb.name
                    )
                )
                stack_result = cloud_formation_stack_result.get('Stacks')[0]
                # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/\
                # using-cfn-describing-stacks.html
                success_status = ['CREATE_COMPLETE', 'UPDATE_COMPLETE']
                if stack_result['StackStatus'] in success_status:
                    if stack_result.get('Outputs'):
                        outputs = stack_result['Outputs']
                    else:
                        return DescribeDeploymentResponse(
                            status=Status.ABORTED('"Outputs" field is not present'),
                            state=DeploymentState(
                                state=DeploymentState.ERROR,
                                error_message='"Outputs" field is not present',
                            ),
                        )
                elif stack_result['StackStatus'] in FAILED_CLOUDFORMATION_STACK_STATUS:
                    state = DeploymentState(state=DeploymentState.FAILED)
                    state.timestamp.GetCurrentTime()
                    return DescribeDeploymentResponse(status=Status.OK(), state=state)
                else:
                    state = DeploymentState(state=DeploymentState.PENDING)
                    state.timestamp.GetCurrentTime()
                    return DescribeDeploymentResponse(status=Status.OK(), state=state)
            except Exception as error:  # pylint: disable=broad-except
                state = DeploymentState(
                    state=DeploymentState.ERROR, error_message=str(error)
                )
                state.timestamp.GetCurrentTime()
                return DescribeDeploymentResponse(
                    status=Status.INTERNAL(str(error)), state=state
                )
            outputs = {o['OutputKey']: o['OutputValue'] for o in outputs}
            info_json = {}

            if 'EndpointUrl' in outputs:
                info_json['endpoints'] = [
                    outputs['EndpointUrl'] + '/' + api_name for api_name in api_names
                ]
            if 'S3Bucket' in outputs:
                info_json['s3_bucket'] = outputs['S3Bucket']

            state = DeploymentState(
                state=DeploymentState.RUNNING, info_json=json.dumps(info_json)
            )
            state.timestamp.GetCurrentTime()
            return DescribeDeploymentResponse(status=Status.OK(), state=state)
        except BentoMLException as error:
            return DescribeDeploymentResponse(status=error.status_proto)
Exemple #6
0
    def _update(self, deployment_pb, current_deployment, bento_pb, bento_path):
        if loader._is_remote_path(bento_path):
            with loader._resolve_remote_bundle_path(bento_path) as local_path:
                return self._update(
                    deployment_pb, current_deployment, bento_pb, local_path
                )
        updated_deployment_spec = deployment_pb.spec
        updated_sagemaker_config = updated_deployment_spec.sagemaker_operator_config
        sagemaker_client = boto3.client(
            'sagemaker', updated_sagemaker_config.region or get_default_aws_region()
        )

        try:
            raise_if_api_names_not_found_in_bento_service_metadata(
                bento_pb.bento.bento_service_metadata,
                [updated_sagemaker_config.api_name],
            )
            describe_latest_deployment_state = self.describe(deployment_pb)
            current_deployment_spec = current_deployment.spec
            current_sagemaker_config = current_deployment_spec.sagemaker_operator_config
            latest_deployment_state = json.loads(
                describe_latest_deployment_state.state.info_json
            )

            current_ecr_image_tag = latest_deployment_state['ProductionVariants'][0][
                'DeployedImages'
            ][0]['SpecifiedImage']
            if (
                updated_deployment_spec.bento_name != current_deployment_spec.bento_name
                or updated_deployment_spec.bento_version
                != current_deployment_spec.bento_version
            ):
                logger.debug(
                    'BentoService tag is different from current deployment, '
                    'creating new docker image and push to ECR'
                )
                with TempDirectory() as temp_dir:
                    sagemaker_project_dir = os.path.join(
                        temp_dir, updated_deployment_spec.bento_name
                    )
                    _init_sagemaker_project(
                        sagemaker_project_dir,
                        bento_path,
                        bento_pb.bento.bento_service_metadata.env.docker_base_image,
                    )
                    ecr_image_path = create_and_push_docker_image_to_ecr(
                        updated_sagemaker_config.region,
                        updated_deployment_spec.bento_name,
                        updated_deployment_spec.bento_version,
                        sagemaker_project_dir,
                    )
            else:
                logger.debug('Using existing ECR image for Sagemaker model')
                ecr_image_path = current_ecr_image_tag

            (
                updated_sagemaker_model_name,
                updated_sagemaker_endpoint_config_name,
                sagemaker_endpoint_name,
            ) = _get_sagemaker_resource_names(deployment_pb)
            (
                current_sagemaker_model_name,
                current_sagemaker_endpoint_config_name,
                _,
            ) = _get_sagemaker_resource_names(current_deployment)

            if (
                updated_sagemaker_config.api_name != current_sagemaker_config.api_name
                or updated_sagemaker_config.num_of_gunicorn_workers_per_instance
                != current_sagemaker_config.num_of_gunicorn_workers_per_instance
                or ecr_image_path != current_ecr_image_tag
            ):
                logger.debug(
                    'Sagemaker model requires update. Delete current sagemaker model %s'
                    'and creating new model %s',
                    current_sagemaker_model_name,
                    updated_sagemaker_model_name,
                )
                _delete_sagemaker_model_if_exist(
                    sagemaker_client, current_sagemaker_model_name
                )
                _create_sagemaker_model(
                    sagemaker_client,
                    updated_sagemaker_model_name,
                    ecr_image_path,
                    updated_sagemaker_config,
                )
            # When bento service tag is not changed, we need to delete the current
            # endpoint configuration in order to create new one to avoid name collation
            if (
                current_sagemaker_endpoint_config_name
                == updated_sagemaker_endpoint_config_name
            ):
                logger.debug(
                    'Current sagemaker config name %s is same as updated one, '
                    'delete it before create new endpoint config',
                    current_sagemaker_endpoint_config_name,
                )
                _delete_sagemaker_endpoint_config_if_exist(
                    sagemaker_client, current_sagemaker_endpoint_config_name
                )
            logger.debug(
                'Create new endpoint configuration %s',
                updated_sagemaker_endpoint_config_name,
            )
            _create_sagemaker_endpoint_config(
                sagemaker_client,
                updated_sagemaker_model_name,
                updated_sagemaker_endpoint_config_name,
                updated_sagemaker_config,
            )
            logger.debug(
                'Updating endpoint to new endpoint configuration %s',
                updated_sagemaker_endpoint_config_name,
            )
            _update_sagemaker_endpoint(
                sagemaker_client,
                sagemaker_endpoint_name,
                updated_sagemaker_endpoint_config_name,
            )
            logger.debug(
                'Delete old sagemaker endpoint config %s',
                current_sagemaker_endpoint_config_name,
            )
            _delete_sagemaker_endpoint_config_if_exist(
                sagemaker_client, current_sagemaker_endpoint_config_name
            )
        except AWSServiceError as e:
            delete_sagemaker_deployment_resources_if_exist(deployment_pb)
            raise e

        return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb)