def run(self): parser = argparse.ArgumentParser() parser.add_argument('--namespace', required=False, default='kubeflow') # pvc://${PVCNAME}/dir parser.add_argument('--storage_uri', required=False, default='/mnt/export') parser.add_argument('--name', required=False, default='kfserving-sample') args = parser.parse_args() namespace = args.namespace serving_name = args.name api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=args.storage_uri, resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'1Gi'}, limits={'cpu':'100m', 'memory':'1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=serving_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('waiting 5 sec for Creating InferenceService') time.sleep(5) KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
def test_triton(): service_name = 'isvc-triton' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, triton=V1alpha2TritonSpec( storage_uri='gs://kfserving-samples/models/tensorrt'))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = KFServing.app_api.list_namespaced_deployment( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for deployment in deployments.items: print(deployment) raise e KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_xgboost_kfserving(): service_name = "isvc-xgboost" default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri="gs://kfserving-samples/models/xgboost/iris", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), )) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '1', 'memory': '2Gi' }, limits={ 'cpu': '1', 'memory': '2Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/flower_input.json') assert (np.argmax(probs[0].get('scores')) == 0) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_canary_rollout(): service_name = 'isvc-canary' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'})))) isvc = V1alpha2InferenceService(api_version=constants.KFSERVING_API_VERSION, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # define canary endpoint spec, and then rollout 10% traffic to the canary version canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2', resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'256Mi'}, limits={'cpu':'100m', 'memory':'256Mi'})))) KFServing.rollout_canary(service_name, canary=canary_endpoint_spec, percent=10, namespace=KFSERVING_TEST_NAMESPACE, watch=True, timeout_seconds=120) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec( container=V1Container( image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) wait_for_isvc_ready(service_name) probs = predict(service_name, './data/transformer.json') assert(np.argmax(probs) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def create_inference_service(namespace: str, name: str, storage_url: str, runtime_version: str, service_account_name: str): api_version = os.path.join(constants.KFSERVING_GROUP, constants.KFSERVING_VERSION) default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, service_account_name=service_account_name, triton=V1alpha2TritonSpec(runtime_version=runtime_version, storage_uri=storage_url, resources=V1ResourceRequirements( requests={ 'cpu': '100m', 'memeory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
def test_sklearn_kfserving(): service_name = 'isvc-xgboost' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri='gs://kfserving-samples/models/xgboost/iris', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/iris_input.json') assert (probs == [1, 1]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, parallelism=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'}, limits={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE, annotations={'serving.kubeflow.org/gke-accelerator': 'nvidia-tesla-k80'}), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert(np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_pmml_kfserving(): service_name = 'isvc-pmml' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pmml=V1alpha2PMMLSpec( storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [[1.0, 0.0, 0.0, "2"]]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'isvc-explainer-tabular' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri='gs://seldon-models/sklearn/income/model', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))), explainer=V1alpha2ExplainerSpec( min_replicas=1, alibi=V1alpha2AlibiExplainerSpec( type='AnchorTabular', storage_uri= 'gs://seldon-models/sklearn/income/explainer-py36-0.5.2', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/income_input.json') assert (res["predictions"] == [0]) precision = explain(service_name, './data/income_input.json') assert (precision > 0.9) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/transformer.json') assert (np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def generate_inferenceservice(): tf_spec = V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers') default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=tf_spec)) isvc = V1alpha2InferenceService( api_version='serving.kubeflow.org/v1alpha2', kind='InferenceService', metadata=client.V1ObjectMeta(name='flower-sample'), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) return isvc
def InferenceService(metadata, default_model_spec, canary_model_spec=None, canary_model_traffic=None): return V1alpha2InferenceService( api_version=constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION, kind=constants.KFSERVING_KIND, metadata=metadata, spec=V1alpha2InferenceServiceSpec( default=default_model_spec, canary=canary_model_spec, canary_traffic_percent=canary_model_traffic))
def generate_isvc(self): """ generate InferenceService """ api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_predictor, canary_predictor = None, None if self.framework == 'custom': default_predictor = self.generate_predictor_spec( self.framework, container=self.custom_default_container) else: default_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.default_storage_uri) if self.framework != 'custom' and self.canary_storage_uri is not None: canary_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.canary_storage_uri) if self.framework == 'custom' and self.custom_canary_container is not None: canary_predictor = self.generate_predictor_spec( self.framework, container=self.custom_canary_container) if canary_predictor: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary=V1alpha2EndpointSpec(predictor=canary_predictor), canary_traffic_percent=self.canary_traffic_percent) else: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary_traffic_percent=self.canary_traffic_percent) return V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta( name=self.isvc_name, generate_name=constants.KFSERVING_DEFAULT_NAME, namespace=self.namespace), spec=isvc_spec)
def test_batcher(): service_name = 'isvc-pytorch-batcher' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( batcher=V1alpha2Batcher( max_batch_size=32, max_latency=5000, timeout=60), min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements(requests={ 'cpu': '1000m', 'memory': '2Gi' }, limits={ 'cpu': '1000m', 'memory': '2Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit( lambda: predict(service_name, './data/cifar_input.json')) for _ in range(4) ] results = [f.result()["batchId"] for f in future_res] assert (all(x == results[0] for x in results) == True) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_lightgbm_kfserving(): service_name = "isvc-lightgbm" default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, lightgbm=V1alpha2LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), )) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, version=constants.KFSERVING_VERSION) except RuntimeError as e: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, version=constants.KFSERVING_V1BETA1_VERSION) try: res = predict(service_name, "./data/iris_input_v3.json", version=constants.KFSERVING_VERSION) except KeyError: res = predict(service_name, "./data/iris_input_v3.json", version=constants.KFSERVING_V1BETA1_VERSION) assert res["predictions"][0][0] > 0.5 KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'aix-explainer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec( container=V1Container( name="predictor", image='aipipeline/rf-predictor:0.4.0', command=["python", "-m", "rfserver", "--model_name", "aix-explainer"], resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'}) ))), explainer=V1alpha2ExplainerSpec( min_replicas=1, aix=V1alpha2AIXExplainerSpec( type='LimeImages', resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input.json') assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) mask = explain_aix(service_name, './data/mnist_input.json') percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask)) assert(percent_in_mask > 0.6) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def deploy_model(namespace,trained_model_path): logging.basicConfig(level=logging.INFO) logging.info('Starting deploy model step ..') logging.info('Input data ..') logging.info('namespace:{}'.format(namespace)) logging.info('trained_model_path:{}'.format(trained_model_path)) logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..') api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'simple-model'+now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, annotations= { 'sidecar.istio.io/inject': 'false', 'autoscaling.knative.dev/target': '1' }, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829 #Note: make sure trained model path starts with file:// or gs:// KFServing = KFServingClient() logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..') KFServing.create(isvc) logging.info('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) logging.info('Deploy model step finished')
def deploy_model(namespace: str, trained_model_path: InputPath(str)): from kubernetes import client from kfserving import KFServingClient from kfserving import constants from kfserving import V1alpha2EndpointSpec from kfserving import V1alpha2PredictorSpec from kfserving import V1alpha2TensorflowSpec from kfserving import V1alpha2InferenceServiceSpec from kfserving import V1alpha2InferenceService from kubernetes.client import V1ResourceRequirements api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION inference_service_name = 'inference112cbk' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=inference_service_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) print('Model deployed')
def test_batcher(): service_name = 'isvc-pytorch-batcher' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( batcher=V1alpha2Batcher( max_batchsize=32, max_latency=5000, timeout=60 ), min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements( requests={'cpu': '2000m', 'memory': '2Gi'}, limits={'cpu': '2000m', 'memory': '2Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE ), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e time.sleep(30) probs = predict(service_name, './data/cifar_input.json') assert(np.argmax(probs) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def deploy_model(namespace, model_file_name, gcp_bucket): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'xgboost-r' + now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name, storage_uri='gs://' + gcp_bucket + '/rmodel', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, namespace=namespace, annotations={'sidecar.istio.io/inject': 'false'}), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
def test_kfserving_logger(): msg_dumper = 'message-dumper' default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( name="kfserving-container", image= 'gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display', )))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=msg_dumper, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE) service_name = 'isvc-logger' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, logger=V1alpha2Logger( mode="all", url="http://message-dumper-predictor-default." + KFSERVING_TEST_NAMESPACE), sklearn=V1alpha2SKLearnSpec( storage_uri='gs://kfserving-samples/models/sklearn/iris', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/iris_input.json') assert (res["predictions"] == [1, 1]) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( msg_dumper)) for pod in pods.items: log = KFServing.core_api.read_namespaced_pod_log( name=pod.metadata.name, namespace=pod.metadata.namespace, container="kfserving-container") print(log) assert ("org.kubeflow.serving.inference.request" in log) assert ("org.kubeflow.serving.inference.response" in log) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE) KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri="s3://anonymous-model-result/result/saved_model", resources=V1ResourceRequirements(requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "100m", "memory": "1Gi" })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.set_credentials( storage_type="S3", namespace='anonymous', credentials_file='credentials', s3_profile="default", s3_endpoint="minio-service.kubeflow.svc.cluster.local:9000", s3_region="us-west-1", s3_use_https="0", s3_verify_ssl="0") KFServing.create(isvc)
def deploy(self, kfservice_id): mykfservice = db.session.query(KfService).filter_by( id=kfservice_id).first() namespace = conf.get('KFSERVING_NAMESPACE') crd_info = conf.get('CRD_INFO')['inferenceservice'] # 根据service生成container def make_container(service, mykfservice): from myapp.utils.py.py_k8s import K8s k8s = K8s() # 不部署,不需要配置集群信息 container = k8s.make_container( name=mykfservice.name + "-" + service.name, command=["sh", "-c", service.command] if service.command else None, args=None, volume_mount=None, image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'), image=service.images, working_dir=service.working_dir if service.working_dir else None, env=service.env, resource_memory=service.resource_memory, resource_cpu=service.resource_cpu, resource_gpu=service.resource_gpu, username=service.created_by.username) return container api_version = crd_info['group'] + '/' + crd_info['version'] default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.default_service.min_replicas, max_replicas=mykfservice.default_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.default_service, mykfservice)))) if mykfservice.default_service else None canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.canary_service.min_replicas, max_replicas=mykfservice.canary_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.canary_service, mykfservice)))) if mykfservice.canary_service else None metadata = kubernetes.client.V1ObjectMeta( name=mykfservice.name, labels={ "app": mykfservice.name, "rtx-user": mykfservice.created_by.username }, namespace=namespace) isvc = V1alpha2InferenceService( api_version=api_version, kind=crd_info['kind'], metadata=metadata, spec=V1alpha2InferenceServiceSpec( default=default_endpoint_spec, canary=canary_endpoint_spec, canary_traffic_percent=mykfservice.canary_traffic_percent)) KFServing = KFServingClient() try: KFServing.delete(mykfservice.name, namespace=namespace, version=crd_info['version']) except Exception as e: print(e) KFServing.create(isvc, namespace=namespace, version=crd_info['version']) flash(category='warning', message='部署启动,一分钟后部署完成') return redirect('/kfservice_modelview/list/')
def main(): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=V1Container( name="kfserving-container", image=FLAGS.image, env=[{ "name": "STORAGE_URI", "value": "%s" % FLAGS.storage_uri }], resources=V1ResourceRequirements( limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}), command=["python"], args=[ "model.py", "--model-name", "%s" % FLAGS.inference_name, "--out_dir", "%s" % FLAGS.model_path, "--classes_file", "%s" % FLAGS.classes_file, ])))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name, namespace=FLAGS.namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) # Create inference service KFServing = KFServingClient() KFServing.create(isvc) time.sleep(2) # Check inference service KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace, watch=True, timeout_seconds=180) model_status = KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace) for condition in model_status["status"]["conditions"]: if condition['type'] == 'Ready': if condition['status'] == 'True': print('Model is ready') break else: print( 'Model is timed out, please check the inferenceservice events for more details.' ) exit(1) try: print( model_status["status"]["url"] + " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands" ) print("Sample test commands: ") print( "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT" ) print( "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')" ) # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample url = re.compile(r"https?://") host, path = url.sub("", model_status["status"]["url"]).split("/", 1) print('curl -X GET -H "Host: ' + host + '" http://$ISTIO_INGRESS_ENDPOINT/' + path) except: print("Model is not ready, check the logs for the Knative URL status.") exit(1)