def test_torchserve_grpc(): service_name = "mnist-grpc" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", ports=[ V1ContainerPort(container_port=7070, name="h2c", protocol="TCP") ], resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "1", "memory": "1Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) with open("./data/torchserve_input.json", 'rb') as f: data = f.read() input_data = {'data': data} stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE) response = stub.Predictions( inference_pb2.PredictionsRequest(model_name='mnist', input=input_data)) prediction = response.prediction.decode('utf-8') json_output = json.loads(prediction) print(json_output) assert (json_output["predictions"][0][0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_transformer(): service_name = 'raw-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '1', 'memory': '1Gi'} ) ), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[V1Container( image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'}), args=["--model_name", "mnist"], env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])] ) annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) raise e res = predict(service_name, "./data/transformer.json", model_name="mnist") assert(res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_batcher(): service_name = 'isvc-pytorch-batcher' predictor = V1beta1PredictorSpec( batcher=V1beta1Batcher( max_batch_size=32, max_latency=5000, ), min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri="gs://kfserving-examples/models/torchserve/image_classifier/v1", resources=V1ResourceRequirements( requests={'cpu': '1', 'memory': '4Gi'}, limits={'cpu': '1', 'memory': '4Gi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit(lambda: predict(service_name, './data/torchserve_batch_input.json')) for _ in range(4) ] results = [ f.result()["batchId"] for f in future_res ] assert (all(x == results[0] for x in results)) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert (np.argmax(res["predictions"]) == 3) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_torchserve_v2_kserve(): service_name = "mnist-v2" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v2", protocol_version="v2", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "1", "memory": "1Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/torchserve_input_v2.json", model_name="mnist") assert (res.get("outputs")[0]["data"] == [1]) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)