def test_canary_rollout(): service_name = 'isvc-canary' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'})))) isvc = V1alpha2InferenceService(api_version=constants.KFSERVING_API_VERSION, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # define canary endpoint spec, and then rollout 10% traffic to the canary version canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2', resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'256Mi'}, limits={'cpu':'100m', 'memory':'256Mi'})))) KFServing.rollout_canary(service_name, canary=canary_endpoint_spec, percent=10, namespace=KFSERVING_TEST_NAMESPACE, watch=True, timeout_seconds=120) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def EndpointSpec(framework, storage_uri, service_account): if framework == "tensorflow": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "pytorch": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "sklearn": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "xgboost": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "onnx": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "tensorrt": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri), service_account_name=service_account, )) else: raise ("Error: No matching framework: " + framework)
def EndpointSpec(framework, storage_uri, service_account_name="k8s-sa", transformer_custom_image=""): if framework == 'tensorflow': return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( service_account_name=service_account_name, tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=client.V1Container( image=transformer_custom_image, name="kfserving-container")))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '1', 'memory': '2Gi' }, limits={ 'cpu': '1', 'memory': '2Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/flower_input.json') assert (np.argmax(probs[0].get('scores')) == 0) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def generate_predictor_spec(self, framework, storage_uri=None, container=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': predictor = V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)) elif self.framework == 'onnx': predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec( storage_uri=storage_uri)) elif self.framework == 'pytorch': predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec( storage_uri=storage_uri)) elif self.framework == 'sklearn': predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri=storage_uri)) elif self.framework == 'triton': predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec( storage_uri=storage_uri)) elif self.framework == 'xgboost': predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec( storage_uri=storage_uri)) elif self.framework == 'custom': predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor
def create_inference_service(namespace: str, name: str, storage_uri: str, runtime_version: str, service_account_name: str): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, service_account_name=service_account_name, tensorflow=V1alpha2TensorflowSpec(runtime_version=runtime_version, storage_uri=storage_uri, resources=V1ResourceRequirements( requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
def run(self): parser = argparse.ArgumentParser() parser.add_argument('--namespace', required=False, default='kubeflow') # pvc://${PVCNAME}/dir parser.add_argument('--storage_uri', required=False, default='/mnt/export') parser.add_argument('--name', required=False, default='kfserving-sample') args = parser.parse_args() namespace = args.namespace serving_name = args.name api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=args.storage_uri, resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'1Gi'}, limits={'cpu':'100m', 'memory':'1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=serving_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('waiting 5 sec for Creating InferenceService') time.sleep(5) KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
def generate_kfservice(): default_model_spec = V1alpha2ModelSpec(tensorflow=V1alpha2TensorflowSpec( model_uri='gs://kfserving-samples/models/tensorflow/flowers')) kfsvc = V1alpha2KFService( api_version='serving.kubeflow.org/v1alpha1', kind='KFService', metadata=client.V1ObjectMeta(name='flower-sample'), spec=V1alpha2KFServiceSpec(default=default_model_spec)) return kfsvc
def generate_kfservice(): tf_spec = V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers') default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=tf_spec)) kfsvc = V1alpha2KFService( api_version='serving.kubeflow.org/v1alpha2', kind='KFService', metadata=client.V1ObjectMeta(name='flower-sample'), spec=V1alpha2KFServiceSpec(default=default_endpoint_spec)) return kfsvc
def EndpointSpec(framework, storage_uri): if framework == 'tensorflow': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def deploy_model(namespace: str, trained_model_path: InputPath(str)): from kubernetes import client from kfserving import KFServingClient from kfserving import constants from kfserving import V1alpha2EndpointSpec from kfserving import V1alpha2PredictorSpec from kfserving import V1alpha2TensorflowSpec from kfserving import V1alpha2InferenceServiceSpec from kfserving import V1alpha2InferenceService from kubernetes.client import V1ResourceRequirements api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION inference_service_name = 'inference112cbk' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=inference_service_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) print('Model deployed')
def deploy_model(namespace,trained_model_path): logging.basicConfig(level=logging.INFO) logging.info('Starting deploy model step ..') logging.info('Input data ..') logging.info('namespace:{}'.format(namespace)) logging.info('trained_model_path:{}'.format(trained_model_path)) logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..') api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'simple-model'+now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, annotations= { 'sidecar.istio.io/inject': 'false', 'autoscaling.knative.dev/target': '1' }, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829 #Note: make sure trained model path starts with file:// or gs:// KFServing = KFServingClient() logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..') KFServing.create(isvc) logging.info('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) logging.info('Deploy model step finished')
def EndpointSpec(framework, storage_uri, service_account, min_replicas, max_replicas): endpointSpec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( service_account_name=service_account, min_replicas=(min_replicas if min_replicas >= 0 else None), max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None))) if framework == "tensorflow": endpointSpec.predictor.tensorflow = V1alpha2TensorflowSpec( storage_uri=storage_uri) return endpointSpec elif framework == "pytorch": endpointSpec.predictor.pytorch = V1alpha2PyTorchSpec( storage_uri=storage_uri) return endpointSpec elif framework == "sklearn": endpointSpec.predictor.sklearn = V1alpha2SKLearnSpec( storage_uri=storage_uri) return endpointSpec elif framework == "xgboost": endpointSpec.predictor.xgboost = V1alpha2XGBoostSpec( storage_uri=storage_uri) return endpointSpec elif framework == "onnx": endpointSpec.predictor.onnx = V1alpha2ONNXSpec(storage_uri=storage_uri) return endpointSpec elif framework == "tensorrt": endpointSpec.predictor.tensorrt = V1alpha2TensorRTSpec( storage_uri=storage_uri) return endpointSpec else: raise ("Error: No matching framework: " + framework)
from kfserving import V1alpha2EndpointSpec from kfserving import V1alpha2PredictorSpec from kfserving import V1alpha2TensorflowSpec from kfserving import V1alpha2InferenceServiceSpec from kfserving import V1alpha2InferenceService from kubernetes.client import V1ResourceRequirements namespace = utils.get_default_target_namespace() api_version = constants.KFSERVING_GROUP + "/" + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri="s3://anonymous-model-result/result/saved_model", resources=V1ResourceRequirements(requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "100m", "memory": "1Gi" })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.set_credentials( storage_type="S3", namespace='anonymous',