def test_xgboost_kfserving(): service_name = "isvc-xgboost" default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri="gs://kfserving-samples/models/xgboost/iris", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), )) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec), ) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_sklearn_kfserving(): service_name = 'isvc-xgboost' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( storage_uri='gs://kfserving-samples/models/xgboost/iris', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) probs = predict(service_name, './data/iris_input.json') assert (probs == [1, 1]) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def EndpointSpec(framework, storage_uri, service_account): if framework == "tensorflow": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "pytorch": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "sklearn": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "xgboost": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "onnx": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri), service_account_name=service_account, )) elif framework == "tensorrt": return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri), service_account_name=service_account, )) else: raise ("Error: No matching framework: " + framework)
def generate_predictor_spec(self, framework, storage_uri=None, container=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': predictor = V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)) elif self.framework == 'onnx': predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec( storage_uri=storage_uri)) elif self.framework == 'pytorch': predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec( storage_uri=storage_uri)) elif self.framework == 'sklearn': predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri=storage_uri)) elif self.framework == 'triton': predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec( storage_uri=storage_uri)) elif self.framework == 'xgboost': predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec( storage_uri=storage_uri)) elif self.framework == 'custom': predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor
def EndpointSpec(framework, storage_uri, service_account_name="k8s-sa", transformer_custom_image=""): if framework == 'tensorflow': return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( service_account_name=service_account_name, tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=client.V1Container( image=transformer_custom_image, name="kfserving-container")))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def EndpointSpec(framework, storage_uri): if framework == 'tensorflow': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def EndpointSpec(framework, storage_uri, service_account, min_replicas, max_replicas): endpointSpec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( service_account_name=service_account, min_replicas=(min_replicas if min_replicas >= 0 else None), max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None))) if framework == "tensorflow": endpointSpec.predictor.tensorflow = V1alpha2TensorflowSpec( storage_uri=storage_uri) return endpointSpec elif framework == "pytorch": endpointSpec.predictor.pytorch = V1alpha2PyTorchSpec( storage_uri=storage_uri) return endpointSpec elif framework == "sklearn": endpointSpec.predictor.sklearn = V1alpha2SKLearnSpec( storage_uri=storage_uri) return endpointSpec elif framework == "xgboost": endpointSpec.predictor.xgboost = V1alpha2XGBoostSpec( storage_uri=storage_uri) return endpointSpec elif framework == "onnx": endpointSpec.predictor.onnx = V1alpha2ONNXSpec(storage_uri=storage_uri) return endpointSpec elif framework == "tensorrt": endpointSpec.predictor.tensorrt = V1alpha2TensorRTSpec( storage_uri=storage_uri) return endpointSpec else: raise ("Error: No matching framework: " + framework)
def deploy_model(namespace, model_file_name, gcp_bucket): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'xgboost-r' + now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, xgboost=V1alpha2XGBoostSpec( #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name, storage_uri='gs://' + gcp_bucket + '/rmodel', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, namespace=namespace, annotations={'sidecar.istio.io/inject': 'false'}), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work KFServing = KFServingClient() KFServing.create(isvc) KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)