def add(self, deployment_pb): try: deployment_spec = deployment_pb.spec sagemaker_config = deployment_spec.sagemaker_operator_config sagemaker_config.region = (sagemaker_config.region or get_default_aws_region()) if not sagemaker_config.region: raise InvalidArgument('AWS region is missing') ensure_docker_available_or_raise() if sagemaker_config is None: raise YataiDeploymentException( 'Sagemaker configuration is missing.') bento_pb = self.yatai_service.GetBento( GetBentoRequest( bento_name=deployment_spec.bento_name, bento_version=deployment_spec.bento_version, )) if bento_pb.bento.uri.type not in (BentoUri.LOCAL, BentoUri.S3): raise BentoMLException( 'BentoML currently not support {} repository'.format( BentoUri.StorageType.Name(bento_pb.bento.uri.type))) return self._add(deployment_pb, bento_pb, bento_pb.bento.uri.uri) except BentoMLException as error: deployment_pb.state.state = DeploymentState.ERROR deployment_pb.state.error_message = ( f'Error creating SageMaker deployment: {str(error)}') return ApplyDeploymentResponse(status=error.status_proto, deployment=deployment_pb)
def delete(self, deployment_pb): try: logger.debug('Deleting AWS Lambda deployment') deployment_spec = deployment_pb.spec lambda_deployment_config = deployment_spec.aws_lambda_operator_config lambda_deployment_config.region = ( lambda_deployment_config.region or get_default_aws_region() ) if not lambda_deployment_config.region: raise InvalidArgument('AWS region is missing') cf_client = boto3.client('cloudformation', lambda_deployment_config.region) stack_name = generate_aws_compatible_string( deployment_pb.namespace, deployment_pb.name ) if deployment_pb.state.info_json: deployment_info_json = json.loads(deployment_pb.state.info_json) bucket_name = deployment_info_json.get('s3_bucket') if bucket_name: _cleanup_s3_bucket_if_exist( bucket_name, lambda_deployment_config.region ) logger.debug( 'Deleting AWS CloudFormation: %s that includes Lambda function ' 'and related resources', stack_name, ) cf_client.delete_stack(StackName=stack_name) return DeleteDeploymentResponse(status=Status.OK()) except BentoMLException as error: return DeleteDeploymentResponse(status=error.status_proto)
def delete(self, deployment_pb): try: deployment_spec = deployment_pb.spec sagemaker_config = deployment_spec.sagemaker_operator_config sagemaker_config.region = (sagemaker_config.region or get_default_aws_region()) if not sagemaker_config.region: raise InvalidArgument('AWS region is missing') delete_sagemaker_deployment_resources_if_exist(deployment_pb) return DeleteDeploymentResponse(status=Status.OK()) except BentoMLException as error: return DeleteDeploymentResponse(status=error.status_proto)
def describe(self, deployment_pb): try: deployment_spec = deployment_pb.spec sagemaker_config = deployment_spec.sagemaker_operator_config sagemaker_config.region = (sagemaker_config.region or get_default_aws_region()) if not sagemaker_config.region: raise InvalidArgument('AWS region is missing') sagemaker_client = boto3.client('sagemaker', sagemaker_config.region) _, _, sagemaker_endpoint_name = _get_sagemaker_resource_names( deployment_pb) try: endpoint_status_response = sagemaker_client.describe_endpoint( EndpointName=sagemaker_endpoint_name) except ClientError as e: raise _aws_client_error_to_bentoml_exception( e, f"Failed to fetch current status of sagemaker endpoint " f"'{sagemaker_endpoint_name}'", ) logger.debug("AWS describe endpoint response: %s", endpoint_status_response) endpoint_status = endpoint_status_response["EndpointStatus"] service_state = ENDPOINT_STATUS_TO_STATE[endpoint_status] deployment_state = DeploymentState( state=service_state, info_json=json.dumps(endpoint_status_response, default=str), ) deployment_state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(state=deployment_state, status=Status.OK()) except BentoMLException as error: return DescribeDeploymentResponse(status=error.status_proto)
def describe(self, deployment_pb): try: deployment_spec = deployment_pb.spec lambda_deployment_config = deployment_spec.aws_lambda_operator_config lambda_deployment_config.region = ( lambda_deployment_config.region or get_default_aws_region() ) if not lambda_deployment_config.region: raise InvalidArgument('AWS region is missing') bento_pb = self.yatai_service.GetBento( GetBentoRequest( bento_name=deployment_spec.bento_name, bento_version=deployment_spec.bento_version, ) ) bento_service_metadata = bento_pb.bento.bento_service_metadata api_names = ( [lambda_deployment_config.api_name] if lambda_deployment_config.api_name else [api.name for api in bento_service_metadata.apis] ) try: cf_client = boto3.client( 'cloudformation', lambda_deployment_config.region ) cloud_formation_stack_result = cf_client.describe_stacks( StackName='{ns}-{name}'.format( ns=deployment_pb.namespace, name=deployment_pb.name ) ) stack_result = cloud_formation_stack_result.get('Stacks')[0] # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/\ # using-cfn-describing-stacks.html success_status = ['CREATE_COMPLETE', 'UPDATE_COMPLETE'] if stack_result['StackStatus'] in success_status: if stack_result.get('Outputs'): outputs = stack_result['Outputs'] else: return DescribeDeploymentResponse( status=Status.ABORTED('"Outputs" field is not present'), state=DeploymentState( state=DeploymentState.ERROR, error_message='"Outputs" field is not present', ), ) elif stack_result['StackStatus'] in FAILED_CLOUDFORMATION_STACK_STATUS: state = DeploymentState(state=DeploymentState.FAILED) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) else: state = DeploymentState(state=DeploymentState.PENDING) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) except Exception as error: # pylint: disable=broad-except state = DeploymentState( state=DeploymentState.ERROR, error_message=str(error) ) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse( status=Status.INTERNAL(str(error)), state=state ) outputs = {o['OutputKey']: o['OutputValue'] for o in outputs} info_json = {} if 'EndpointUrl' in outputs: info_json['endpoints'] = [ outputs['EndpointUrl'] + '/' + api_name for api_name in api_names ] if 'S3Bucket' in outputs: info_json['s3_bucket'] = outputs['S3Bucket'] state = DeploymentState( state=DeploymentState.RUNNING, info_json=json.dumps(info_json) ) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) except BentoMLException as error: return DescribeDeploymentResponse(status=error.status_proto)
def _update(self, deployment_pb, current_deployment, bento_pb, bento_path): if loader._is_remote_path(bento_path): with loader._resolve_remote_bundle_path(bento_path) as local_path: return self._update( deployment_pb, current_deployment, bento_pb, local_path ) updated_deployment_spec = deployment_pb.spec updated_sagemaker_config = updated_deployment_spec.sagemaker_operator_config sagemaker_client = boto3.client( 'sagemaker', updated_sagemaker_config.region or get_default_aws_region() ) try: raise_if_api_names_not_found_in_bento_service_metadata( bento_pb.bento.bento_service_metadata, [updated_sagemaker_config.api_name], ) describe_latest_deployment_state = self.describe(deployment_pb) current_deployment_spec = current_deployment.spec current_sagemaker_config = current_deployment_spec.sagemaker_operator_config latest_deployment_state = json.loads( describe_latest_deployment_state.state.info_json ) current_ecr_image_tag = latest_deployment_state['ProductionVariants'][0][ 'DeployedImages' ][0]['SpecifiedImage'] if ( updated_deployment_spec.bento_name != current_deployment_spec.bento_name or updated_deployment_spec.bento_version != current_deployment_spec.bento_version ): logger.debug( 'BentoService tag is different from current deployment, ' 'creating new docker image and push to ECR' ) with TempDirectory() as temp_dir: sagemaker_project_dir = os.path.join( temp_dir, updated_deployment_spec.bento_name ) _init_sagemaker_project( sagemaker_project_dir, bento_path, bento_pb.bento.bento_service_metadata.env.docker_base_image, ) ecr_image_path = create_and_push_docker_image_to_ecr( updated_sagemaker_config.region, updated_deployment_spec.bento_name, updated_deployment_spec.bento_version, sagemaker_project_dir, ) else: logger.debug('Using existing ECR image for Sagemaker model') ecr_image_path = current_ecr_image_tag ( updated_sagemaker_model_name, updated_sagemaker_endpoint_config_name, sagemaker_endpoint_name, ) = _get_sagemaker_resource_names(deployment_pb) ( current_sagemaker_model_name, current_sagemaker_endpoint_config_name, _, ) = _get_sagemaker_resource_names(current_deployment) if ( updated_sagemaker_config.api_name != current_sagemaker_config.api_name or updated_sagemaker_config.num_of_gunicorn_workers_per_instance != current_sagemaker_config.num_of_gunicorn_workers_per_instance or ecr_image_path != current_ecr_image_tag ): logger.debug( 'Sagemaker model requires update. Delete current sagemaker model %s' 'and creating new model %s', current_sagemaker_model_name, updated_sagemaker_model_name, ) _delete_sagemaker_model_if_exist( sagemaker_client, current_sagemaker_model_name ) _create_sagemaker_model( sagemaker_client, updated_sagemaker_model_name, ecr_image_path, updated_sagemaker_config, ) # When bento service tag is not changed, we need to delete the current # endpoint configuration in order to create new one to avoid name collation if ( current_sagemaker_endpoint_config_name == updated_sagemaker_endpoint_config_name ): logger.debug( 'Current sagemaker config name %s is same as updated one, ' 'delete it before create new endpoint config', current_sagemaker_endpoint_config_name, ) _delete_sagemaker_endpoint_config_if_exist( sagemaker_client, current_sagemaker_endpoint_config_name ) logger.debug( 'Create new endpoint configuration %s', updated_sagemaker_endpoint_config_name, ) _create_sagemaker_endpoint_config( sagemaker_client, updated_sagemaker_model_name, updated_sagemaker_endpoint_config_name, updated_sagemaker_config, ) logger.debug( 'Updating endpoint to new endpoint configuration %s', updated_sagemaker_endpoint_config_name, ) _update_sagemaker_endpoint( sagemaker_client, sagemaker_endpoint_name, updated_sagemaker_endpoint_config_name, ) logger.debug( 'Delete old sagemaker endpoint config %s', current_sagemaker_endpoint_config_name, ) _delete_sagemaker_endpoint_config_if_exist( sagemaker_client, current_sagemaker_endpoint_config_name ) except AWSServiceError as e: delete_sagemaker_deployment_resources_if_exist(deployment_pb) raise e return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb)