def test_lightgbm_kfserving(): service_name = "isvc-lightgbm" predictor = V1beta1PredictorSpec( min_replicas=1, lightgbm=V1beta1LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v3.json") assert res["predictions"][0][0] > 0.5 KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_triton(): service_name = 'isvc-triton' predictor = V1beta1PredictorSpec( min_replicas=1, triton=V1beta1TritonSpec( storage_uri='gs://kfserving-samples/models/tensorrt' ) ) isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = KFServing.app_api. \ list_namespaced_deployment(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/' 'inferenceservice={}'. format(service_name)) for deployment in deployments.items: print(deployment) raise e KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_torchserve_kfserving(): service_name = "mnist" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier", protocol_version="v1", resources=V1ResourceRequirements( requests={ "cpu": "1", "memory": "4Gi" }, limits={ "cpu": "1", "memory": "4Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=api_v1beta1_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/torchserve_input.json") assert (res.get("predictions")[0] == 2) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'isvc-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })) ]) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/transformer.json') assert (np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_pmml_kfserving(): service_name = 'isvc-pmml' predictor = V1beta1PredictorSpec( min_replicas=1, pmml=V1beta1PMMLSpec(storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [{ 'Species': 'setosa', 'Probability_setosa': 1.0, 'Probability_versicolor': 0.0, 'Probability_virginica': 0.0, 'Node_Id': '2' }]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_xgboost_v2_kfserving(): service_name = "isvc-xgboost-v2" predictor = V1beta1PredictorSpec( min_replicas=1, xgboost=V1beta1XGBoostSpec( storage_uri="gs://kfserving-samples/models/xgboost/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1.0, 1.0] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' predictor = V1beta1PredictorSpec( min_replicas=1, tensorflow=V1beta1TFServingSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '1', 'memory': '2Gi' }, limits={ 'cpu': '1', 'memory': '2Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/flower_input.json') assert (np.argmax(res["predictions"][0].get('scores')) == 0) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'isvc-explainer-tabular' predictor = V1beta1PredictorSpec(sklearn=V1beta1SKLearnSpec( storage_uri='gs://seldon-models/sklearn/income/model', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))) explainer = V1beta1ExplainerSpec( min_replicas=1, alibi=V1beta1AlibiExplainerSpec( name='kfserving-container', type='AnchorTabular', storage_uri= 'gs://seldon-models/sklearn/income/explainer-py37-0.6.0', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, explainer=explainer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/income_input.json') assert (res["predictions"] == [0]) precision = explain(service_name, './data/income_input.json') assert (precision > 0.9) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def create_inference_service(metadata, predictor_spec): """ Build and return V1beta1InferenceService object. """ return V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=metadata, spec=V1beta1InferenceServiceSpec(predictor=predictor_spec), )
def test_tabular_explainer(): service_name = 'art-explainer' isvc = V1beta1InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec( predictor=V1beta1PredictorSpec(containers=[ V1Container( name="predictor", # Update the image below to the aipipeline org. image='aipipeline/art-server:mnist-predictor', command=[ "python", "-m", "sklearnserver", "--model_name", "art-explainer", "--model_dir", "file://sklearnserver/sklearnserver/example_model" ]) ]), explainer=V1beta1ExplainerSpec(min_replicas=1, art=V1beta1ARTExplainerSpec( type='SquareAttack', name='explainer', config={"nb_classes": "10"})))) KFServing.create(isvc, version=kfserving_version) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input_bw_flat.json') assert (res["predictions"] == [3]) adv_prediction = explain_art(service_name, './data/mnist_input_bw.json') assert (adv_prediction != 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE, version=kfserving_version)
def test_batcher(): service_name = 'isvc-pytorch-batcher' predictor = V1beta1PredictorSpec( batcher=V1beta1Batcher( max_batch_size=32, max_latency=5000, ), min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '2Gi'}, limits={'cpu': '100m', 'memory': '2Gi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit(lambda: predict(service_name, './data/cifar_input.json')) for _ in range(4) ] results = [ f.result()["batchId"] for f in future_res ] assert (all(x == results[0] for x in results)) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_paddle(): predictor = V1beta1PredictorSpec( min_replicas=1, paddle=V1beta1PaddleServerSpec( storage_uri= "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz", resources=V1ResourceRequirements( requests={ "cpu": "200m", "memory": "4Gi" }, limits={ "cpu": "200m", "memory": "4Gi" }, ))) service_name = 'isvc-paddle' isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/jay.json') assert np.argmax(res["predictions"][0]) == 17 KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_raw_deployment_kfserving(): service_name = "raw-sklearn" annotations = dict() annotations['serving.kubeflow.org/raw'] = 'true' annotations['kubernetes.io/ingress.class'] = 'istio' predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://kfserving-samples/models/sklearn/iris", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE, annotations=annotations, ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_sklearn_v2_kfserving(): service_name = "isvc-sklearn-v2" predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://seldon-models/sklearn/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=api_v1beta1_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) KFServing.create(isvc, version=constants.KFSERVING_V1BETA1_VERSION) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 2] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_kfserving_logger(): msg_dumper = 'message-dumper' predictor = V1beta1PredictorSpec( min_replicas=1, containers=[V1Container(name="kfserving-container", image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')] ) isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=msg_dumper, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE) service_name = 'isvc-logger' predictor = V1beta1PredictorSpec( min_replicas=1, logger=V1beta1LoggerSpec( mode="all", url="http://message-dumper."+KFSERVING_TEST_NAMESPACE+".svc.cluster.local" ), sklearn=V1beta1SKLearnSpec( storage_uri='gs://kfserving-samples/models/sklearn/iris', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError: pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) res = predict(service_name, './data/iris_input.json') assert(res["predictions"] == [1, 1]) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(msg_dumper)) time.sleep(5) log = '' for pod in pods.items: log += KFServing.core_api.read_namespaced_pod_log(name=pod.metadata.name, namespace=pod.metadata.namespace, container="kfserving-container") print(log) assert("org.kubeflow.serving.inference.request" in log) assert("org.kubeflow.serving.inference.response" in log) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE) KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'aix-explainer' predictor = V1beta1PredictorSpec(containers=[ V1Container(name="predictor", image='aipipeline/rf-predictor:0.4.0', command=[ "python", "-m", "rfserver", "--model_name", "aix-explainer" ], resources=V1ResourceRequirements(requests={ 'cpu': '500m', 'memory': '1Gi' }, limits={ 'cpu': '500m', 'memory': '1Gi' })) ]) explainer = V1beta1ExplainerSpec(min_replicas=1, aix=V1beta1AIXExplainerSpec( name='explainer', type='LimeImages', resources=V1ResourceRequirements( requests={ 'cpu': '500m', 'memory': '1Gi' }, limits={ 'cpu': '500m', 'memory': '1Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, explainer=explainer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input.json') assert (res["predictions"] == [[ 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]]) mask = explain_aix(service_name, './data/mnist_input.json') percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask)) assert (percent_in_mask > 0.6) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'raw' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= 'gs://kfserving-examples/models/torchserve/image_classifier', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' })), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image='kfserving/torchserve-image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }), env=[ V1EnvVar( name="STORAGE_URI", value= "gs://kfserving-examples/models/torchserve/image_classifier" ) ]) ]) annotations = dict() annotations['serving.kubeflow.org/raw'] = 'true' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: raise e time.sleep(30) isvc = KFServing.get( service_name, namespace=KFSERVING_TEST_NAMESPACE, ) cluster_ip = get_cluster_ip() logging.info("clusterip = %s", cluster_ip) host = isvc["status"]["url"] host = host[host.rfind('/') + 1:] url = 'http://{}/v1/models/mnist:predict'.format(cluster_ip) logging.info("url = %s ", url) headers = {"Host": host} data_str = '{"instances": [{"data": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAw0lE\ QVR4nGNgGFggVVj4/y8Q2GOR83n+58/fP0DwcSqmpNN7oOTJw6f+/H2pjUU2JCSEk0EWqN0cl828e/FIxvz9/9cCh1\ zS5z9/G9mwyzl/+PNnKQ45nyNAr9ThMHQ/UG4tDofuB4bQIhz6fIBenMWJQ+7Vn7+zeLCbKXv6z59NOPQVgsIcW\ 4QA9YFi6wNQLrKwsBebW/68DJ388Nun5XFocrqvIFH59+XhBAxThTfeB0r+vP/QHbuDCgr2JmOXoSsAAKK7b\ U3vISS4AAAAAElFTkSuQmCC", "target": 0}]}' res = requests.post(url, data_str, headers=headers) logging.info("res.text = %s", res.text) preds = json.loads(res.content.decode("utf-8")) assert (preds["predictions"] == [2]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_mms_sklearn_kfserving(protocol_version: str, storage_uris: List[str]): # Define an inference service predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( protocol_version=protocol_version, resources=client.V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) service_name = f"isvc-sklearn-mms-{protocol_version}" isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) # Create an instance of inference service with isvc KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) cluster_ip = get_cluster_ip() model_names = [ f"model1-sklearn-{protocol_version}", f"model2-sklearn-{protocol_version}", ] for model_name, storage_uri in zip(model_names, storage_uris): model_spec = V1alpha1ModelSpec( storage_uri=storage_uri, memory="128Mi", framework="sklearn", ) model = V1alpha1TrainedModel( api_version=constants.KFSERVING_V1ALPHA1, kind=constants.KFSERVING_KIND_TRAINEDMODEL, metadata=client.V1ObjectMeta(name=model_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha1TrainedModelSpec(inference_service=service_name, model=model_spec), ) # Create instances of trained models using model1 and model2 KFServing.create_trained_model(model, KFSERVING_TEST_NAMESPACE) KFServing.wait_model_ready( service_name, model_name, isvc_namespace=KFSERVING_TEST_NAMESPACE, isvc_version=constants.KFSERVING_V1BETA1_VERSION, protocol_version=protocol_version, cluster_ip=cluster_ip, ) input_json = "./data/iris_input.json" if protocol_version == "v2": input_json = "./data/iris_input_v2.json" responses = [ predict( service_name, input_json, model_name=model_name, protocol_version=protocol_version, ) for model_name in model_names ] if protocol_version == "v1": assert responses[0]["predictions"] == [1, 1] assert responses[1]["predictions"] == [1, 1] elif protocol_version == "v2": assert responses[0]["outputs"][0]["data"] == [1, 2] assert responses[1]["outputs"][0]["data"] == [1, 2] # Clean up inference service and trained models for model_name in model_names: KFServing.delete_trained_model(model_name, KFSERVING_TEST_NAMESPACE) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)