def test_transformer(): service_name = 'isvc-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })) ]) isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/transformer.json') assert (np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_batcher(): service_name = 'isvc-pytorch-batcher' predictor = V1beta1PredictorSpec( batcher=V1beta1Batcher( max_batch_size=32, max_latency=5000, ), min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '2Gi'}, limits={'cpu': '100m', 'memory': '2Gi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit(lambda: predict(service_name, './data/cifar_input.json')) for _ in range(4) ] results = [ f.result()["batchId"] for f in future_res ] assert (all(x == results[0] for x in results)) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'raw' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= 'gs://kfserving-examples/models/torchserve/image_classifier', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' })), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image='kfserving/torchserve-image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }), env=[ V1EnvVar( name="STORAGE_URI", value= "gs://kfserving-examples/models/torchserve/image_classifier" ) ]) ]) annotations = dict() annotations['serving.kubeflow.org/raw'] = 'true' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService( api_version=constants.KFSERVING_V1BETA1, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: raise e time.sleep(30) isvc = KFServing.get( service_name, namespace=KFSERVING_TEST_NAMESPACE, ) cluster_ip = get_cluster_ip() logging.info("clusterip = %s", cluster_ip) host = isvc["status"]["url"] host = host[host.rfind('/') + 1:] url = 'http://{}/v1/models/mnist:predict'.format(cluster_ip) logging.info("url = %s ", url) headers = {"Host": host} data_str = '{"instances": [{"data": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAw0lE\ QVR4nGNgGFggVVj4/y8Q2GOR83n+58/fP0DwcSqmpNN7oOTJw6f+/H2pjUU2JCSEk0EWqN0cl828e/FIxvz9/9cCh1\ zS5z9/G9mwyzl/+PNnKQ45nyNAr9ThMHQ/UG4tDofuB4bQIhz6fIBenMWJQ+7Vn7+zeLCbKXv6z59NOPQVgsIcW\ 4QA9YFi6wNQLrKwsBebW/68DJ388Nun5XFocrqvIFH59+XhBAxThTfeB0r+vP/QHbuDCgr2JmOXoSsAAKK7b\ U3vISS4AAAAAElFTkSuQmCC", "target": 0}]}' res = requests.post(url, data_str, headers=headers) logging.info("res.text = %s", res.text) preds = json.loads(res.content.decode("utf-8")) assert (preds["predictions"] == [2]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)