def test_sklearn_runtime_kserve(): service_name = "isvc-sklearn-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="sklearn", ), storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_lightgbm_kserve(): service_name = "isvc-lightgbm" predictor = V1beta1PredictorSpec( min_replicas=1, lightgbm=V1beta1LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm/iris", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v3.json") assert res["predictions"][0][0] > 0.5 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_sklearn_v2_kserve(): service_name = "isvc-sklearn-v2" predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://seldon-models/sklearn/mms/lr_model", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pmml_runtime_kserve(): service_name = 'isvc-pmml-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="pmml", ), storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [{'Species': 'setosa', 'Probability_setosa': 1.0, 'Probability_versicolor': 0.0, 'Probability_virginica': 0.0, 'Node_Id': '2'}]) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_torchserve_grpc(): service_name = "mnist-grpc" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", ports=[ V1ContainerPort(container_port=7070, name="h2c", protocol="TCP") ], resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "1", "memory": "1Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) with open("./data/torchserve_input.json", 'rb') as f: data = f.read() input_data = {'data': data} stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE) response = stub.Predictions( inference_pb2.PredictionsRequest(model_name='mnist', input=input_data)) prediction = response.prediction.decode('utf-8') json_output = json.loads(prediction) print(json_output) assert (json_output["predictions"][0][0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_transformer(): service_name = 'raw-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '1', 'memory': '1Gi'} ) ), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[V1Container( image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'}), args=["--model_name", "mnist"], env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])] ) annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) raise e res = predict(service_name, "./data/transformer.json", model_name="mnist") assert(res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_paddle_runtime(): predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="paddle", ), storage_uri= "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz", resources=V1ResourceRequirements( requests={ "cpu": "200m", "memory": "4Gi" }, limits={ "cpu": "200m", "memory": "4Gi" }, ))) service_name = 'isvc-paddle-runtime' isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/jay.json') assert np.argmax(res["predictions"][0]) == 17 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert (np.argmax(res["predictions"]) == 3) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_xgboost_v2_runtime_kserve(): service_name = "isvc-xgboost-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="xgboost", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-samples/models/xgboost/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1.0, 1.0] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_raw_deployment_kserve(): service_name = "raw-sklearn" annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations, ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_lightgbm_v2_runtime_kserve(): service_name = "isvc-lightgbm-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="lightgbm", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "1", "memory": "1Gi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [ 8.796664107010673e-06, 0.9992300031041593, 0.0007612002317336916, 4.974786820804187e-06, 0.9999919650711493, 3.0601420299625077e-06] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_torchserve_runtime_kserve(): service_name = "mnist-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", protocol_version="v1", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "4Gi" }, limits={ "cpu": "1", "memory": "4Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/torchserve_input.json", model_name="mnist") assert (res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_mms_sklearn_kserve(protocol_version: str, storage_uri: str): # Define an inference service predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( protocol_version=protocol_version, resources=client.V1ResourceRequirements( requests={ "cpu": "100m", "memory": "512Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) service_name = f"isvc-sklearn-mms-{protocol_version}" isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) # Create an instance of inference service with isvc kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) cluster_ip = get_cluster_ip() model_names = [ f"model1-sklearn-{protocol_version}", f"model2-sklearn-{protocol_version}", ] for model_name in model_names: model_spec = V1alpha1ModelSpec( storage_uri=storage_uri, memory="128Mi", framework="sklearn", ) model = V1alpha1TrainedModel( api_version=constants.KSERVE_V1ALPHA1, kind=constants.KSERVE_KIND_TRAINEDMODEL, metadata=client.V1ObjectMeta(name=model_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1alpha1TrainedModelSpec(inference_service=service_name, model=model_spec), ) # Create instances of trained models using model1 and model2 kserve_client.create_trained_model(model, KSERVE_TEST_NAMESPACE) kserve_client.wait_model_ready( service_name, model_name, isvc_namespace=KSERVE_TEST_NAMESPACE, isvc_version=constants.KSERVE_V1BETA1_VERSION, protocol_version=protocol_version, cluster_ip=cluster_ip, ) input_json = "./data/iris_input.json" if protocol_version == "v2": input_json = "./data/iris_input_v2.json" responses = [ predict( service_name, input_json, model_name=model_name, protocol_version=protocol_version, ) for model_name in model_names ] if protocol_version == "v1": assert responses[0]["predictions"] == [1, 1] assert responses[1]["predictions"] == [1, 1] elif protocol_version == "v2": assert responses[0]["outputs"][0]["data"] == [1, 1] assert responses[1]["outputs"][0]["data"] == [1, 1] # Clean up inference service and trained models for model_name in model_names: kserve_client.delete_trained_model(model_name, KSERVE_TEST_NAMESPACE) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_triton_runtime(): service_name = 'isvc-triton-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), runtime="kserve-tritonserver", storage_uri='gs://kfserving-examples/models/torchscript', ports=[ V1ContainerPort(name="h2c", protocol="TCP", container_port=9000) ])) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }), args=["--model_name", "cifar10", "--protocol", "grpc-v2"]) ]) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = kserve_client.app_api. \ list_namespaced_deployment(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/' 'inferenceservice={}'. format(service_name)) for deployment in deployments.items: print(deployment) raise e res = predict(service_name, "./data/image.json", model_name='cifar10') assert (np.argmax(res.get("predictions")[0]) == 5) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)