def generate_ec2_resource_names(namespace, name): sam_template_name = generate_aws_compatible_string( f"btml-template-{namespace}-{name}") deployment_stack_name = generate_aws_compatible_string( f"btml-stack-{namespace}-{name}") repo_name = generate_aws_compatible_string(f"btml-repo-{namespace}-{name}") elb_name = generate_aws_compatible_string(f"{namespace}-{name}", max_length=32) return sam_template_name, deployment_stack_name, repo_name, elb_name
def _get_sagemaker_resource_names(deployment_pb): sagemaker_model_name = generate_aws_compatible_string( (deployment_pb.namespace, 10), (deployment_pb.name, 12), (deployment_pb.spec.bento_name, 20), (deployment_pb.spec.bento_version, 18), ) sagemaker_endpoint_config_name = generate_aws_compatible_string( (deployment_pb.namespace, 10), (deployment_pb.name, 12), (deployment_pb.spec.bento_name, 20), (deployment_pb.spec.bento_version, 18), ) sagemaker_endpoint_name = generate_aws_compatible_string( deployment_pb.namespace, deployment_pb.name) return sagemaker_model_name, sagemaker_endpoint_config_name, sagemaker_endpoint_name
def delete(self, deployment_pb): try: logger.debug('Deleting AWS Lambda deployment') deployment_spec = deployment_pb.spec lambda_deployment_config = deployment_spec.aws_lambda_operator_config lambda_deployment_config.region = (lambda_deployment_config.region or get_default_aws_region()) if not lambda_deployment_config.region: raise InvalidArgument('AWS region is missing') cf_client = boto3.client('cloudformation', lambda_deployment_config.region) stack_name = generate_aws_compatible_string( deployment_pb.namespace, deployment_pb.name) if deployment_pb.state.info_json: deployment_info_json = json.loads( deployment_pb.state.info_json) bucket_name = deployment_info_json.get('s3_bucket') if bucket_name: cleanup_s3_bucket_if_exist(bucket_name, lambda_deployment_config.region) logger.debug( 'Deleting AWS CloudFormation: %s that includes Lambda function ' 'and related resources', stack_name, ) cf_client.delete_stack(StackName=stack_name) return DeleteDeploymentResponse(status=Status.OK()) except BentoMLException as error: return DeleteDeploymentResponse(status=error.status_proto)
def _add(self, deployment_pb, bento_pb, bento_path): if loader._is_remote_path(bento_path): with loader._resolve_remote_bundle_path(bento_path) as local_path: return self._add(deployment_pb, bento_pb, local_path) deployment_spec = deployment_pb.spec lambda_deployment_config = deployment_spec.aws_lambda_operator_config bento_service_metadata = bento_pb.bento.bento_service_metadata lambda_s3_bucket = generate_aws_compatible_string( 'btml-{namespace}-{name}-{random_string}'.format( namespace=deployment_pb.namespace, name=deployment_pb.name, random_string=uuid.uuid4().hex[:6].lower(), )) try: create_s3_bucket_if_not_exists(lambda_s3_bucket, lambda_deployment_config.region) _deploy_lambda_function( deployment_pb=deployment_pb, bento_service_metadata=bento_service_metadata, deployment_spec=deployment_spec, lambda_s3_bucket=lambda_s3_bucket, lambda_deployment_config=lambda_deployment_config, bento_path=bento_path, ) return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb) except BentoMLException as error: if lambda_s3_bucket and lambda_deployment_config: cleanup_s3_bucket_if_exist(lambda_s3_bucket, lambda_deployment_config.region) raise error
def _add(self, deployment_pb, bento_pb, bento_path): try: if loader._is_remote_path(bento_path): with loader._resolve_remote_bundle_path( bento_path) as local_path: return self._add(deployment_pb, bento_pb, local_path) deployment_spec = deployment_pb.spec aws_ec2_deployment_config = deployment_spec.aws_ec2_operator_config user_id = get_aws_user_id() artifact_s3_bucket_name = generate_aws_compatible_string( "btml-{user_id}-{namespace}".format( user_id=user_id, namespace=deployment_pb.namespace, )) create_s3_bucket_if_not_exists(artifact_s3_bucket_name, aws_ec2_deployment_config.region) self.deploy_service( deployment_pb, deployment_spec, bento_path, aws_ec2_deployment_config, artifact_s3_bucket_name, aws_ec2_deployment_config.region, ) except BentoMLException as error: if artifact_s3_bucket_name and aws_ec2_deployment_config.region: cleanup_s3_bucket_if_exist(artifact_s3_bucket_name, aws_ec2_deployment_config.region) raise error return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb)
def delete(self, deployment_pb): try: deployment_spec = deployment_pb.spec ec2_deployment_config = deployment_spec.aws_ec2_operator_config ec2_deployment_config.region = ( ec2_deployment_config.region or get_default_aws_region() ) if not ec2_deployment_config.region: raise InvalidArgument("AWS region is missing") # delete stack deployment_stack_name = generate_aws_compatible_string( "btml-stack-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name ) ) delete_cloudformation_stack( deployment_stack_name, ec2_deployment_config.region ) # delete repo from ecr repository_name = generate_aws_compatible_string( "btml-repo-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name ) ) delete_ecr_repository(repository_name, ec2_deployment_config.region) # remove bucket if deployment_pb.state.info_json: deployment_info_json = json.loads(deployment_pb.state.info_json) bucket_name = deployment_info_json.get('S3Bucket') if bucket_name: cleanup_s3_bucket_if_exist( bucket_name, ec2_deployment_config.region ) return DeleteDeploymentResponse(status=Status.OK()) except BentoMLException as error: return DeleteDeploymentResponse(status=error.status_proto)
def describe(self, deployment_pb): try: deployment_spec = deployment_pb.spec lambda_deployment_config = deployment_spec.aws_lambda_operator_config lambda_deployment_config.region = (lambda_deployment_config.region or get_default_aws_region()) if not lambda_deployment_config.region: raise InvalidArgument('AWS region is missing') bento_pb = self.yatai_service.GetBento( GetBentoRequest( bento_name=deployment_spec.bento_name, bento_version=deployment_spec.bento_version, )) bento_service_metadata = bento_pb.bento.bento_service_metadata api_names = ([lambda_deployment_config.api_name] if lambda_deployment_config.api_name else [api.name for api in bento_service_metadata.apis]) try: cf_client = boto3.client('cloudformation', lambda_deployment_config.region) stack_name = generate_aws_compatible_string( '{ns}-{name}'.format(ns=deployment_pb.namespace, name=deployment_pb.name)) cloud_formation_stack_result = cf_client.describe_stacks( StackName=stack_name) stack_result = cloud_formation_stack_result.get('Stacks')[0] # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/\ # using-cfn-describing-stacks.html success_status = ['CREATE_COMPLETE', 'UPDATE_COMPLETE'] if stack_result['StackStatus'] in success_status: if stack_result.get('Outputs'): outputs = stack_result['Outputs'] else: return DescribeDeploymentResponse( status=Status.ABORTED( '"Outputs" field is not present'), state=DeploymentState( state=DeploymentState.ERROR, error_message='"Outputs" field is not present', ), ) elif stack_result[ 'StackStatus'] in FAILED_CLOUDFORMATION_STACK_STATUS: state = DeploymentState(state=DeploymentState.FAILED) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) else: state = DeploymentState(state=DeploymentState.PENDING) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) except Exception as error: # pylint: disable=broad-except state = DeploymentState(state=DeploymentState.ERROR, error_message=str(error)) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.INTERNAL( str(error)), state=state) outputs = {o['OutputKey']: o['OutputValue'] for o in outputs} info_json = {} if 'EndpointUrl' in outputs: info_json['endpoints'] = [ outputs['EndpointUrl'] + '/' + api_name for api_name in api_names ] if 'S3Bucket' in outputs: info_json['s3_bucket'] = outputs['S3Bucket'] state = DeploymentState(state=DeploymentState.RUNNING, info_json=json.dumps(info_json)) state.timestamp.GetCurrentTime() return DescribeDeploymentResponse(status=Status.OK(), state=state) except BentoMLException as error: return DescribeDeploymentResponse(status=error.status_proto)
def _deploy_lambda_function( deployment_pb, bento_service_metadata, deployment_spec, lambda_s3_bucket, lambda_deployment_config, bento_path, ): deployment_path_prefix = os.path.join(deployment_pb.namespace, deployment_pb.name) py_major, py_minor, _ = bento_service_metadata.env.python_version.split( '.') if py_major != '3': raise BentoMLException( 'Python 2 is not supported for Lambda Deployment') python_runtime = 'python{}.{}'.format(py_major, py_minor) artifact_types = [ item.artifact_type for item in bento_service_metadata.artifacts ] if any(i in ['TensorflowSavedModelArtifact', 'KerasModelArtifact'] for i in artifact_types) and (py_major, py_minor) != ('3', '6'): raise BentoMLException( 'AWS Lambda Deployment only supports BentoML services' 'built with Python 3.6.x. To fix this, repack your' 'service with the right Python version' '(hint: pyenv/anaconda) and try again') api_names = ([lambda_deployment_config.api_name] if lambda_deployment_config.api_name else [api.name for api in bento_service_metadata.apis]) raise_if_api_names_not_found_in_bento_service_metadata( bento_service_metadata, api_names) with TempDirectory() as lambda_project_dir: logger.debug( 'Generating cloudformation template.yaml for lambda project at %s', lambda_project_dir, ) template_file_path = _create_aws_lambda_cloudformation_template_file( project_dir=lambda_project_dir, namespace=deployment_pb.namespace, deployment_name=deployment_pb.name, deployment_path_prefix=deployment_path_prefix, api_names=api_names, bento_service_name=deployment_spec.bento_name, s3_bucket_name=lambda_s3_bucket, py_runtime=python_runtime, memory_size=lambda_deployment_config.memory_size, timeout=lambda_deployment_config.timeout, ) logger.debug('Validating generated template.yaml') validate_sam_template( template_file_path, lambda_deployment_config.region, lambda_project_dir, ) logger.debug( 'Initializing lambda project in directory: %s ...', lambda_project_dir, ) init_sam_project( lambda_project_dir, bento_path, deployment_pb.name, deployment_spec.bento_name, api_names, aws_region=lambda_deployment_config.region, ) for api_name in api_names: build_directory = os.path.join(lambda_project_dir, '.aws-sam', 'build', api_name) logger.debug( 'Checking is function "%s" bundle under lambda size ' 'limit', api_name, ) # Since we only use s3 get object in lambda function, and # lambda function pack their own boto3/botocore modules, # we will just delete those modules from function bundle # directory delete_list = ['boto3', 'botocore'] for name in delete_list: logger.debug('Remove module "%s" from build directory', name) shutil.rmtree(os.path.join(build_directory, name)) total_build_dir_size = total_file_or_directory_size( build_directory) if total_build_dir_size > LAMBDA_FUNCTION_MAX_LIMIT: raise BentoMLException( 'Build function size is over 700MB, max size ' 'capable for AWS Lambda function') if total_build_dir_size >= LAMBDA_FUNCTION_LIMIT: logger.debug( 'Function %s is over lambda size limit, attempting ' 'reduce it', api_name, ) reduce_bundle_size_and_upload_extra_resources_to_s3( build_directory=build_directory, region=lambda_deployment_config.region, s3_bucket=lambda_s3_bucket, deployment_prefix=deployment_path_prefix, function_name=api_name, lambda_project_dir=lambda_project_dir, ) else: logger.debug( 'Function bundle is within Lambda limit, removing ' 'download_extra_resources.py file from function bundle') os.remove( os.path.join(build_directory, 'download_extra_resources.py')) logger.info('Packaging AWS Lambda project at %s ...', lambda_project_dir) lambda_package( lambda_project_dir, lambda_deployment_config.region, lambda_s3_bucket, deployment_path_prefix, ) logger.info('Deploying lambda project') stack_name = generate_aws_compatible_string(deployment_pb.namespace + '-' + deployment_pb.name) lambda_deploy( lambda_project_dir, lambda_deployment_config.region, stack_name=stack_name, )
def describe(self, deployment_pb): try: deployment_spec = deployment_pb.spec ec2_deployment_config = deployment_spec.aws_ec2_operator_config ec2_deployment_config.region = (ec2_deployment_config.region or get_default_aws_region()) if not ec2_deployment_config.region: raise InvalidArgument("AWS region is missing") bento_pb = self.yatai_service.GetBento( GetBentoRequest( bento_name=deployment_spec.bento_name, bento_version=deployment_spec.bento_version, )) bento_service_metadata = bento_pb.bento.bento_service_metadata api_names = [api.name for api in bento_service_metadata.apis] deployment_stack_name = generate_aws_compatible_string( "btml-stack-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name)) try: cf_client = boto3.client("cloudformation", ec2_deployment_config.region) cloudformation_stack_result = cf_client.describe_stacks( StackName=deployment_stack_name) stack_result = cloudformation_stack_result.get("Stacks")[0] if stack_result.get("Outputs"): outputs = stack_result.get("Outputs") else: return DescribeDeploymentResponse( status=Status.ABORTED( '"Outputs" field is not present'), state=DeploymentState( state=DeploymentState.ERROR, error_message='"Outputs" field is not present', ), ) if stack_result[ "StackStatus"] in FAILED_CLOUDFORMATION_STACK_STATUS: state = DeploymentState(state=DeploymentState.FAILED) return DescribeDeploymentResponse(status=Status.OK(), state=state) except Exception as error: # pylint: disable=broad-except state = DeploymentState(state=DeploymentState.ERROR, error_message=str(error)) return DescribeDeploymentResponse(status=Status.INTERNAL( str(error)), state=state) info_json = {} outputs = {o["OutputKey"]: o["OutputValue"] for o in outputs} if "AutoScalingGroup" in outputs: info_json[ "InstanceDetails"] = get_instance_ip_from_scaling_group( [outputs["AutoScalingGroup"]], ec2_deployment_config.region) info_json["Endpoints"] = get_endpoints_from_instance_address( info_json["InstanceDetails"], api_names) if "S3Bucket" in outputs: info_json["S3Bucket"] = outputs["S3Bucket"] if "TargetGroup" in outputs: info_json["TargetGroup"] = outputs["TargetGroup"] if "Url" in outputs: info_json["Url"] = outputs["Url"] healthy_target = get_healthy_target(outputs["TargetGroup"], ec2_deployment_config.region) if healthy_target: deployment_state = DeploymentState.RUNNING else: deployment_state = DeploymentState.PENDING state = DeploymentState(state=deployment_state, info_json=json.dumps(info_json)) return DescribeDeploymentResponse(status=Status.OK(), state=state) except BentoMLException as error: return DescribeDeploymentResponse(status=error.status_proto)
def deploy_service( self, deployment_pb, deployment_spec, bento_path, aws_ec2_deployment_config, s3_bucket_name, region, ): sam_template_name = generate_aws_compatible_string( "btml-template-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name)) deployment_stack_name = generate_aws_compatible_string( "btml-stack-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name)) repo_name = generate_aws_compatible_string( "btml-repo-{namespace}-{name}".format( namespace=deployment_pb.namespace, name=deployment_pb.name)) elb_name = generate_aws_compatible_string( "{namespace}-{name}".format(namespace=deployment_pb.namespace, name=deployment_pb.name), max_length=32, ) with TempDirectory() as project_path: registry_id = _create_ecr_repo(repo_name, region) registry_token, registry_url = _get_ecr_password( registry_id, region) registry_username, registry_password = _get_creds_from_token( registry_token) registry_domain = registry_url.replace("https://", "") push_tag = f"{registry_domain}/{repo_name}" pull_tag = push_tag + f":{deployment_spec.bento_version}" logger.info("Containerizing service") containerize_bento_service( bento_name=deployment_spec.bento_name, bento_version=deployment_spec.bento_version, saved_bundle_path=bento_path, push=True, tag=push_tag, build_arg={}, username=registry_username, password=registry_password, ) logger.info("Generating user data") encoded_user_data = _make_user_data(registry_url, pull_tag, region) logger.info("Making template") template_file_path = _make_cloudformation_template( project_path, encoded_user_data, s3_bucket_name, sam_template_name, elb_name, aws_ec2_deployment_config.ami_id, aws_ec2_deployment_config.instance_type, aws_ec2_deployment_config.autoscale_min_size, aws_ec2_deployment_config.autoscale_desired_capacity, aws_ec2_deployment_config.autoscale_max_size, ) validate_sam_template(sam_template_name, aws_ec2_deployment_config.region, project_path) logger.info("Building service") build_template(template_file_path, project_path, aws_ec2_deployment_config.region) logger.info("Packaging service") package_template(s3_bucket_name, project_path, aws_ec2_deployment_config.region) logger.info("Deploying service") deploy_template( deployment_stack_name, s3_bucket_name, project_path, aws_ec2_deployment_config.region, )