def generate_predictor_spec(self, framework, storage_uri=None, container=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': predictor = V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)) elif self.framework == 'onnx': predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec( storage_uri=storage_uri)) elif self.framework == 'pytorch': predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec( storage_uri=storage_uri)) elif self.framework == 'sklearn': predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri=storage_uri)) elif self.framework == 'triton': predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec( storage_uri=storage_uri)) elif self.framework == 'xgboost': predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec( storage_uri=storage_uri)) elif self.framework == 'custom': predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor
def customEndpointSpec(custom_model_spec, service_account, min_replicas, max_replicas): env = ([ client.V1EnvVar(name=i["name"], value=i["value"]) for i in custom_model_spec["env"] ] if custom_model_spec.get("env", "") else None) ports = ([ client.V1ContainerPort(container_port=int( custom_model_spec.get("port", "")), protocol="TCP") ] if custom_model_spec.get("port", "") else None) resources = (client.V1ResourceRequirements( requests=( custom_model_spec["resources"]["requests"] if custom_model_spec.get('resources', {}).get('requests') else None), limits=( custom_model_spec["resources"]["limits"] if custom_model_spec.get( 'resources', {}).get('limits') else None), ) if custom_model_spec.get("resources", {}) else None) containerSpec = client.V1Container( name=custom_model_spec.get("name", "custom-container"), image=custom_model_spec["image"], env=env, ports=ports, command=custom_model_spec.get("command", None), args=custom_model_spec.get("args", None), image_pull_policy=custom_model_spec.get("image_pull_policy", None), working_dir=custom_model_spec.get("working_dir", None), resources=resources) return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec(container=containerSpec), service_account_name=service_account, min_replicas=(min_replicas if min_replicas >= 0 else None), max_replicas=(max_replicas if max_replicas > 0 and max_replicas >= min_replicas else None)))
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec( container=V1Container( image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'}))))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) wait_for_isvc_ready(service_name) probs = predict(service_name, './data/transformer.json') assert(np.argmax(probs) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def EndpointSpec(framework, storage_uri, service_account_name="k8s-sa", transformer_custom_image=""): if framework == 'tensorflow': return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( service_account_name=service_account_name, tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=client.V1Container( image=transformer_custom_image, name="kfserving-container")))) elif framework == 'pytorch': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))) elif framework == 'sklearn': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))) elif framework == 'xgboost': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))) elif framework == 'onnx': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))) elif framework == 'tensorrt': return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri))) else: raise ("Error: No matching framework: " + framework)
def customEndpointSpec(custom_model_spec, service_account): env = ( [ client.V1EnvVar(name=i["name"], value=i["value"]) for i in custom_model_spec["env"] ] if custom_model_spec.get("env", "") else None ) ports = ( [client.V1ContainerPort(container_port=int(custom_model_spec.get("port", "")))] if custom_model_spec.get("port", "") else None ) containerSpec = client.V1Container( name=custom_model_spec.get("name", "custom-container"), image=custom_model_spec["image"], env=env, ports=ports, command=custom_model_spec.get("command", None), args=custom_model_spec.get("args", None), image_pull_policy=custom_model_spec.get("image_pull_policy", None), working_dir=custom_model_spec.get("working_dir", None), ) return V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec(container=containerSpec), service_account_name=service_account, ) )
def test_transformer(): service_name = 'isvc-transformer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, pytorch=V1alpha2PyTorchSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))), transformer=V1alpha2TransformerSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest', name='kfserving-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' }))))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) except RuntimeError as e: print( KFServing.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/transformer.json') assert (np.argmax(res["predictions"]) == 3) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'aix-explainer' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( custom=V1alpha2CustomSpec( container=V1Container( name="predictor", image='aipipeline/rf-predictor:0.4.0', command=["python", "-m", "rfserver", "--model_name", "aix-explainer"], resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'}) ))), explainer=V1alpha2ExplainerSpec( min_replicas=1, aix=V1alpha2AIXExplainerSpec( type='LimeImages', resources=V1ResourceRequirements( requests={'cpu': '500m', 'memory': '1Gi'}, limits={'cpu': '500m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) try: KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input.json') assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) mask = explain_aix(service_name, './data/mnist_input.json') percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask)) assert(percent_in_mask > 0.6) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def roll(self, kfservice_id): mykfservice = db.session.query(KfService).filter_by( id=kfservice_id).first() namespace = conf.get('KFSERVING_NAMESPACE') crd_info = conf.get('CRD_INFO')['inferenceservice'] # 根据service生成container def make_container(service, mykfservice): from myapp.utils.py.py_k8s import K8s k8s = K8s() # 不部署,不需要配置集群信息 container = k8s.make_container( name=mykfservice.name + "-" + service.name, command=["sh", "-c", service.command] if service.command else None, args=None, volume_mount=None, image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'), image=service.images, working_dir=service.working_dir if service.working_dir else None, env=service.env, resource_memory=service.resource_memory, resource_cpu=service.resource_cpu, resource_gpu=service.resource_gpu, username=service.created_by.username, ports=service.ports) return container canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.canary_service.min_replicas, max_replicas=mykfservice.canary_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.canary_service, mykfservice)))) if mykfservice.canary_service else None KFServing = KFServingClient() KFServing.rollout_canary(mykfservice.name, canary=canary_endpoint_spec, percent=mykfservice.canary_traffic_percent, namespace=namespace, timeout_seconds=120, version=crd_info['version']) flash(category='warning', message='滚动升级已配置,刷新查看当前流量比例') return redirect('/kfservice_modelview/list/')
def customEndpointSpec(custom_model_spec): env = [ client.V1EnvVar(name=i['name'], value=i['value']) for i in custom_model_spec['env'] ] if custom_model_spec.get('env', '') else None ports = [ client.V1ContainerPort( container_port=int(custom_model_spec.get('port', ''))) ] if custom_model_spec.get('port', '') else None containerSpec = client.V1Container( name=custom_model_spec.get('name', 'custom-container'), image=custom_model_spec['image'], env=env, ports=ports, command=custom_model_spec.get('command', None), args=custom_model_spec.get('args', None), image_pull_policy=custom_model_spec.get('image_pull_policy', None), working_dir=custom_model_spec.get('working_dir', None)) return V1alpha2EndpointSpec(custom=V1alpha2CustomSpec( container=containerSpec))
def run(self): logger.info("Retrieving kfserving client") client = KFServingClient() logger.info("Specifying canary") canary = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( name=Serve.SERVICE_NAME, image="{}:{}".format(Pipeline.DEPLOY_IMAGE, self.args.tag), image_pull_policy="Always", )))) logger.info("Rolling out canary deployment") client.rollout_canary(Serve.SERVICE_NAME, canary=canary, percent=50, namespace=Rollout.NAMESPACE, watch=True) logger.info("Promoting canary deployment") client.promote(Serve.SERVICE_NAME, namespace=Rollout.NAMESPACE, watch=True)
def test_kfserving_logger(): msg_dumper = 'message-dumper' default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( min_replicas=1, custom=V1alpha2CustomSpec(container=V1Container( name="kfserving-container", image= 'gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display', )))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=msg_dumper, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE) service_name = 'isvc-logger' default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=1, logger=V1alpha2Logger( mode="all", url="http://message-dumper-predictor-default." + KFSERVING_TEST_NAMESPACE), sklearn=V1alpha2SKLearnSpec( storage_uri='gs://kfserving-samples/models/sklearn/iris', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KFSERVING_TEST_NAMESPACE), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing.create(isvc) KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE) res = predict(service_name, './data/iris_input.json') assert (res["predictions"] == [1, 1]) pods = KFServing.core_api.list_namespaced_pod( KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/inferenceservice={}'.format( msg_dumper)) for pod in pods.items: log = KFServing.core_api.read_namespaced_pod_log( name=pod.metadata.name, namespace=pod.metadata.namespace, container="kfserving-container") print(log) assert ("org.kubeflow.serving.inference.request" in log) assert ("org.kubeflow.serving.inference.response" in log) KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE) KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
def deploy(self, kfservice_id): mykfservice = db.session.query(KfService).filter_by( id=kfservice_id).first() namespace = conf.get('KFSERVING_NAMESPACE') crd_info = conf.get('CRD_INFO')['inferenceservice'] # 根据service生成container def make_container(service, mykfservice): from myapp.utils.py.py_k8s import K8s k8s = K8s() # 不部署,不需要配置集群信息 container = k8s.make_container( name=mykfservice.name + "-" + service.name, command=["sh", "-c", service.command] if service.command else None, args=None, volume_mount=None, image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'), image=service.images, working_dir=service.working_dir if service.working_dir else None, env=service.env, resource_memory=service.resource_memory, resource_cpu=service.resource_cpu, resource_gpu=service.resource_gpu, username=service.created_by.username) return container api_version = crd_info['group'] + '/' + crd_info['version'] default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.default_service.min_replicas, max_replicas=mykfservice.default_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.default_service, mykfservice)))) if mykfservice.default_service else None canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.canary_service.min_replicas, max_replicas=mykfservice.canary_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.canary_service, mykfservice)))) if mykfservice.canary_service else None metadata = kubernetes.client.V1ObjectMeta( name=mykfservice.name, labels={ "app": mykfservice.name, "rtx-user": mykfservice.created_by.username }, namespace=namespace) isvc = V1alpha2InferenceService( api_version=api_version, kind=crd_info['kind'], metadata=metadata, spec=V1alpha2InferenceServiceSpec( default=default_endpoint_spec, canary=canary_endpoint_spec, canary_traffic_percent=mykfservice.canary_traffic_percent)) KFServing = KFServingClient() try: KFServing.delete(mykfservice.name, namespace=namespace, version=crd_info['version']) except Exception as e: print(e) KFServing.create(isvc, namespace=namespace, version=crd_info['version']) flash(category='warning', message='部署启动,一分钟后部署完成') return redirect('/kfservice_modelview/list/')
from kfserving import V1alpha2PredictorSpec from kfserving import V1alpha2CustomSpec from kfserving import V1alpha2InferenceServiceSpec from kfserving import V1alpha2InferenceService from kubernetes.client import V1ResourceRequirements namespace = utils.get_default_target_namespace() api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2CustomSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name='flower-sample', namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc)
def main(): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=V1Container( name="kfserving-container", image=FLAGS.image, env=[{ "name": "STORAGE_URI", "value": "%s" % FLAGS.storage_uri }], resources=V1ResourceRequirements( limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}), command=["python"], args=[ "model.py", "--model-name", "%s" % FLAGS.inference_name, "--out_dir", "%s" % FLAGS.model_path, "--classes_file", "%s" % FLAGS.classes_file, ])))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name, namespace=FLAGS.namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) # Create inference service KFServing = KFServingClient() KFServing.create(isvc) time.sleep(2) # Check inference service KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace, watch=True, timeout_seconds=180) model_status = KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace) for condition in model_status["status"]["conditions"]: if condition['type'] == 'Ready': if condition['status'] == 'True': print('Model is ready') break else: print( 'Model is timed out, please check the inferenceservice events for more details.' ) exit(1) try: print( model_status["status"]["url"] + " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands" ) print("Sample test commands: ") print( "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT" ) print( "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')" ) # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample url = re.compile(r"https?://") host, path = url.sub("", model_status["status"]["url"]).split("/", 1) print('curl -X GET -H "Host: ' + host + '" http://$ISTIO_INGRESS_ENDPOINT/' + path) except: print("Model is not ready, check the logs for the Knative URL status.") exit(1)