def test_canary_rollout(): service_name = 'isvc-canary' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'})))) isvc = V1alpha2InferenceService(api_version=constants.KFSERVING_API_VERSION, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # define canary endpoint spec, and then rollout 10% traffic to the canary version canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2', resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'256Mi'}, limits={'cpu':'100m', 'memory':'256Mi'})))) KFServing.rollout_canary(service_name, canary=canary_endpoint_spec, percent=10, namespace=KFSERVING_TEST_NAMESPACE, watch=True, timeout_seconds=120) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def run(self): parser = argparse.ArgumentParser() parser.add_argument('--namespace', required=False, default='kubeflow') # pvc://${PVCNAME}/dir parser.add_argument('--storage_uri', required=False, default='/mnt/export') parser.add_argument('--name', required=False, default='kfserving-sample') args = parser.parse_args() namespace = args.namespace serving_name = args.name api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=args.storage_uri, resources=V1ResourceRequirements( requests={'cpu':'100m','memory':'1Gi'}, limits={'cpu':'100m', 'memory':'1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=serving_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('waiting 5 sec for Creating InferenceService') time.sleep(5) KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '1', 'memory': '2Gi' }, limits={ 'cpu': '1', 'memory': '2Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/flower_input.json') assert (np.argmax(probs[0].get('scores')) == 0) # Delete the InferenceService KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def test_xgboost_kfserving(): service_name = "isvc-xgboost" default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri="gs://kfserving-samples/models/xgboost/iris", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), )) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_pmml_kfserving(): service_name = 'isvc-pmml' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pmml=V1alpha2PMMLSpec( storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [[1.0, 0.0, 0.0, "2"]]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving(): service_name = 'isvc-tensorflow' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, tensorflow=V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
def create_inference_service(namespace: str, name: str, storage_url: str, runtime_version: str, service_account_name: str): api_version = os.path.join(constants.KFSERVING_GROUP, constants.KFSERVING_VERSION) default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, service_account_name=service_account_name, triton=V1alpha2TritonSpec(runtime_version=runtime_version, storage_uri=storage_url, resources=V1ResourceRequirements( requests={ 'cpu': '100m', 'memeory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec( container=V1Container( image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) wait_for_isvc_ready(service_name) probs = predict(service_name, './data/transformer.json') assert(np.argmax(probs) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_triton(): service_name = 'isvc-triton' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, triton=V1alpha2TritonSpec( storage_uri='gs://kfserving-samples/models/tensorrt'))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = KFServing.app_api.list_namespaced_deployment( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for deployment in deployments.items: print(deployment) raise e KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, parallelism=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'}, limits={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE, annotations={'serving.kubeflow.org/gke-accelerator': 'nvidia-tesla-k80'}), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert(np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def customEndpointSpec(custom_model_spec, service_account): env = ( [ client.V1EnvVar(name=i["name"], value=i["value"]) for i in custom_model_spec["env"] ] if custom_model_spec.get("env", "") else None ) ports = ( [client.V1ContainerPort(container_port=int(custom_model_spec.get("port", "")))] if custom_model_spec.get("port", "") else None ) containerSpec = client.V1Container( name=custom_model_spec.get("name", "custom-container"), image=custom_model_spec["image"], env=env, ports=ports, command=custom_model_spec.get("command", None), args=custom_model_spec.get("args", None), image_pull_policy=custom_model_spec.get("image_pull_policy", None), working_dir=custom_model_spec.get("working_dir", None), ) return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec(container=containerSpec), service_account_name=service_account, ) )
def test_sklearn_kfserving(): service_name = 'isvc-xgboost' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri='gs://kfserving-samples/models/xgboost/iris', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/iris_input.json') assert (probs == [1, 1]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def customEndpointSpec(custom_model_spec, service_account, min_replicas, max_replicas): env = ([ client.V1EnvVar(name=i["name"], value=i["value"]) for i in custom_model_spec["env"] ] if custom_model_spec.get("env", "") else None) ports = ([ client.V1ContainerPort(container_port=int( custom_model_spec.get("port", "")), protocol="TCP") ] if custom_model_spec.get("port", "") else None) resources = (client.V1ResourceRequirements( requests=( custom_model_spec["resources"]["requests"] if custom_model_spec.get('resources', {}).get('requests') else None), limits=( custom_model_spec["resources"]["limits"] if custom_model_spec.get( 'resources', {}).get('limits') else None), ) if custom_model_spec.get("resources", {}) else None) containerSpec = client.V1Container( name=custom_model_spec.get("name", "custom-container"), image=custom_model_spec["image"], env=env, ports=ports, command=custom_model_spec.get("command", None), args=custom_model_spec.get("args", None), image_pull_policy=custom_model_spec.get("image_pull_policy", None), working_dir=custom_model_spec.get("working_dir", None), resources=resources) return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec(container=containerSpec), service_account_name=service_account, min_replicas=(min_replicas if min_replicas >= 0 else None), max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None)))
def test_tabular_explainer(): service_name = 'isvc-explainer-tabular' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri='gs://seldon-models/sklearn/income/model', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))), explainer=V1alpha2ExplainerSpec( min_replicas=1, alibi=V1alpha2AlibiExplainerSpec( type='AnchorTabular', storage_uri= 'gs://seldon-models/sklearn/income/explainer-py36-0.5.2', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/income_input.json') assert (res["predictions"] == [0]) precision = explain(service_name, './data/income_input.json') assert (precision > 0.9) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/transformer.json') assert (np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def generate_kfservice(): tf_spec = V1alpha2TensorflowSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers') default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=tf_spec)) kfsvc = V1alpha2KFService( api_version='serving.kubeflow.org/v1alpha2', kind='KFService', metadata=client.V1ObjectMeta(name='flower-sample'), spec=V1alpha2KFServiceSpec(default=default_endpoint_spec)) return kfsvc
def EndpointSpec(framework, storage_uri, service_account): if framework == "tensorflow": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "pytorch": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "sklearn": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "xgboost": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "onnx": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "tensorrt": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri), service_account_name=service_account, )) else: raise ("Error: No matching framework: " + framework)
def EndpointSpec(framework, storage_uri, service_account_name="k8s-sa", transformer_custom_image=""): if framework == 'tensorflow': return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( service_account_name=service_account_name, tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=client.V1Container( image=transformer_custom_image, name="kfserving-container")))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def generate_isvc(self): """ generate InferenceService """ api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_predictor, canary_predictor = None, None if self.framework == 'custom': default_predictor = self.generate_predictor_spec( self.framework, container=self.custom_default_container) else: default_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.default_storage_uri) if self.framework != 'custom' and self.canary_storage_uri is not None: canary_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.canary_storage_uri) if self.framework == 'custom' and self.custom_canary_container is not None: canary_predictor = self.generate_predictor_spec( self.framework, container=self.custom_canary_container) if canary_predictor: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary=V1alpha2EndpointSpec(predictor=canary_predictor), canary_traffic_percent=self.canary_traffic_percent) else: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary_traffic_percent=self.canary_traffic_percent) return V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta( name=self.isvc_name, generate_name=constants.KFSERVING_DEFAULT_NAME, namespace=self.namespace), spec=isvc_spec)
def test_batcher(): service_name = 'isvc-pytorch-batcher' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( batcher=V1alpha2Batcher( max_batch_size=32, max_latency=5000, timeout=60), min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements(requests={ 'cpu': '1000m', 'memory': '2Gi' }, limits={ 'cpu': '1000m', 'memory': '2Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit( lambda: predict(service_name, './data/cifar_input.json')) for _ in range(4) ] results = [f.result()["batchId"] for f in future_res] assert (all(x == results[0] for x in results) == True) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_lightgbm_kfserving(): service_name = "isvc-lightgbm" default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, lightgbm=V1alpha2LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), )) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, version=constants.KFSERVING_VERSION) except RuntimeError as e: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, version=constants.KFSERVING_V1BETA1_VERSION) try: res = predict(service_name, "./data/iris_input_v3.json", version=constants.KFSERVING_VERSION) except KeyError: res = predict(service_name, "./data/iris_input_v3.json", version=constants.KFSERVING_V1BETA1_VERSION) assert res["predictions"][0][0] > 0.5 KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'aix-explainer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec( container=V1Container( name="predictor", image='aipipeline/rf-predictor:0.4.0', command=["python", "-m", "rfserver", "--model_name", "aix-explainer"], resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'}) ))), explainer=V1alpha2ExplainerSpec( min_replicas=1, aix=V1alpha2AIXExplainerSpec( type='LimeImages', resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input.json') assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) mask = explain_aix(service_name, './data/mnist_input.json') percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask)) assert(percent_in_mask > 0.6) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def roll(self, kfservice_id): mykfservice = db.session.query(KfService).filter_by( id=kfservice_id).first() namespace = conf.get('KFSERVING_NAMESPACE') crd_info = conf.get('CRD_INFO')['inferenceservice'] # 根据service生成container def make_container(service, mykfservice): from myapp.utils.py.py_k8s import K8s k8s = K8s() # 不部署,不需要配置集群信息 container = k8s.make_container( name=mykfservice.name + "-" + service.name, command=["sh", "-c", service.command] if service.command else None, args=None, volume_mount=None, image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'), image=service.images, working_dir=service.working_dir if service.working_dir else None, env=service.env, resource_memory=service.resource_memory, resource_cpu=service.resource_cpu, resource_gpu=service.resource_gpu, username=service.created_by.username, ports=service.ports) return container canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.canary_service.min_replicas, max_replicas=mykfservice.canary_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.canary_service, mykfservice)))) if mykfservice.canary_service else None KFServing = KFServingClient() KFServing.rollout_canary(mykfservice.name, canary=canary_endpoint_spec, percent=mykfservice.canary_traffic_percent, namespace=namespace, timeout_seconds=120, version=crd_info['version']) flash(category='warning', message='滚动升级已配置,刷新查看当前流量比例') return redirect('/kfservice_modelview/list/')
def customEndpointSpec(custom_model_spec): env = [ client.V1EnvVar(name=i['name'], value=i['value']) for i in custom_model_spec['env'] ] if custom_model_spec.get('env', '') else None ports = [ client.V1ContainerPort( container_port=int(custom_model_spec.get('port', ''))) ] if custom_model_spec.get('port', '') else None containerSpec = client.V1Container( name=custom_model_spec.get('name', 'custom-container'), image=custom_model_spec['image'], env=env, ports=ports, command=custom_model_spec.get('command', None), args=custom_model_spec.get('args', None), image_pull_policy=custom_model_spec.get('image_pull_policy', None), working_dir=custom_model_spec.get('working_dir', None)) return V1alpha2EndpointSpec(custom=V1alpha2CustomSpec( container=containerSpec))
def deploy_model(namespace,trained_model_path): logging.basicConfig(level=logging.INFO) logging.info('Starting deploy model step ..') logging.info('Input data ..') logging.info('namespace:{}'.format(namespace)) logging.info('trained_model_path:{}'.format(trained_model_path)) logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..') api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'simple-model'+now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, annotations= { 'sidecar.istio.io/inject': 'false', 'autoscaling.knative.dev/target': '1' }, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829 #Note: make sure trained model path starts with file:// or gs:// KFServing = KFServingClient() logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..') KFServing.create(isvc) logging.info('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) logging.info('Deploy model step finished')
def deploy_model(namespace: str, trained_model_path: InputPath(str)): from kubernetes import client from kfserving import KFServingClient from kfserving import constants from kfserving import V1alpha2EndpointSpec from kfserving import V1alpha2PredictorSpec from kfserving import V1alpha2TensorflowSpec from kfserving import V1alpha2InferenceServiceSpec from kfserving import V1alpha2InferenceService from kubernetes.client import V1ResourceRequirements api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION inference_service_name = 'inference112cbk' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=inference_service_name, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) print('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) print('Model deployed')
def test_batcher(): service_name = 'isvc-pytorch-batcher' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( batcher=V1alpha2Batcher( max_batchsize=32, max_latency=5000, timeout=60 ), min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name='Net', resources=V1ResourceRequirements( requests={'cpu': '2000m', 'memory': '2Gi'}, limits={'cpu': '2000m', 'memory': '2Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE ), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e time.sleep(30) probs = predict(service_name, './data/cifar_input.json') assert(np.argmax(probs) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def EndpointSpec(framework, storage_uri, service_account, min_replicas, max_replicas): endpointSpec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( service_account_name=service_account, min_replicas=(min_replicas if min_replicas >= 0 else None), max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None))) if framework == "tensorflow": endpointSpec.predictor.tensorflow = V1alpha2TensorflowSpec( storage_uri=storage_uri) return endpointSpec elif framework == "pytorch": endpointSpec.predictor.pytorch = V1alpha2PyTorchSpec( storage_uri=storage_uri) return endpointSpec elif framework == "sklearn": endpointSpec.predictor.sklearn = V1alpha2SKLearnSpec( storage_uri=storage_uri) return endpointSpec elif framework == "xgboost": endpointSpec.predictor.xgboost = V1alpha2XGBoostSpec( storage_uri=storage_uri) return endpointSpec elif framework == "onnx": endpointSpec.predictor.onnx = V1alpha2ONNXSpec(storage_uri=storage_uri) return endpointSpec elif framework == "tensorrt": endpointSpec.predictor.tensorrt = V1alpha2TensorRTSpec( storage_uri=storage_uri) return endpointSpec else: raise ("Error: No matching framework: " + framework)
def deploy_model(namespace, model_file_name, gcp_bucket): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'xgboost-r' + now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name, storage_uri='gs://' + gcp_bucket + '/rmodel', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, namespace=namespace, annotations={'sidecar.istio.io/inject': 'false'}), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
def run(self): logger.info("Retrieving kfserving client") client = KFServingClient() logger.info("Specifying canary") canary = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( name=Serve.SERVICE_NAME, image="{}:{}".format(Pipeline.DEPLOY_IMAGE, self.args.tag), image_pull_policy="Always", )))) logger.info("Rolling out canary deployment") client.rollout_canary(Serve.SERVICE_NAME, canary=canary, percent=50, namespace=Rollout.NAMESPACE, watch=True) logger.info("Promoting canary deployment") client.promote(Serve.SERVICE_NAME, namespace=Rollout.NAMESPACE, watch=True)