def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, autoscaling_target=0): if int(autoscaling_target) != 0: annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)} else: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) if framework != 'custom': default_model_spec = ModelSpec(framework, default_model_uri) else: default_model_spec = customModelSpec(default_custom_model_spec) # Create Canary deployment if canary model uri is provided. if framework != 'custom' and canary_model_uri: canary_model_spec = ModelSpec(framework, canary_model_uri) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) elif framework == 'custom' and canary_custom_model_spec: canary_model_spec = customModelSpec(canary_custom_model_spec) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) else: kfsvc = kfserving_deployment(metadata, default_model_spec) KFServing = KFServingClient() if action == 'create': KFServing.create(kfsvc) elif action == 'update': KFServing.patch(model_name, kfsvc) elif action == 'delete': KFServing.delete(model_name, namespace=namespace) else: raise("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) return model_status
def deploy_model(namespace,trained_model_path): logging.basicConfig(level=logging.INFO) logging.info('Starting deploy model step ..') logging.info('Input data ..') logging.info('namespace:{}'.format(namespace)) logging.info('trained_model_path:{}'.format(trained_model_path)) logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..') api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") inference_service_name = 'simple-model'+now default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec( storage_uri=trained_model_path, resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'})))) isvc = V1alpha2InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta( name=inference_service_name, annotations= { 'sidecar.istio.io/inject': 'false', 'autoscaling.knative.dev/target': '1' }, namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) #velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829 #Note: make sure trained model path starts with file:// or gs:// KFServing = KFServingClient() logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..') KFServing.create(isvc) logging.info('Inference service ' + inference_service_name + " created ...") KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) logging.info('Deploy model step finished')
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec( tensorflow=V1alpha2CustomSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' })))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=client.V1ObjectMeta(name='flower-sample', namespace=namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) KFServing = KFServingClient() KFServing.create(isvc) KFServing.get('flower-sample', namespace=namespace, watch=True, timeout_seconds=120) # KFServing.delete('flower-sample', namespace=namespace)
def main(): api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=V1Container( name="kfserving-container", image=FLAGS.image, env=[{ "name": "STORAGE_URI", "value": "%s" % FLAGS.storage_uri }], resources=V1ResourceRequirements( limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}), command=["python"], args=[ "model.py", "--model-name", "%s" % FLAGS.inference_name, "--out_dir", "%s" % FLAGS.model_path, "--classes_file", "%s" % FLAGS.classes_file, ])))) isvc = V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name, namespace=FLAGS.namespace), spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) # Create inference service KFServing = KFServingClient() KFServing.create(isvc) time.sleep(2) # Check inference service KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace, watch=True, timeout_seconds=180) model_status = KFServing.get(FLAGS.inference_name, namespace=FLAGS.namespace) for condition in model_status["status"]["conditions"]: if condition['type'] == 'Ready': if condition['status'] == 'True': print('Model is ready') break else: print( 'Model is timed out, please check the inferenceservice events for more details.' ) exit(1) try: print( model_status["status"]["url"] + " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands" ) print("Sample test commands: ") print( "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT" ) print( "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')" ) # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample url = re.compile(r"https?://") host, path = url.sub("", model_status["status"]["url"]).split("/", 1) print('curl -X GET -H "Host: ' + host + '" http://$ISTIO_INGRESS_ENDPOINT/' + path) except: print("Model is not ready, check the logs for the Knative URL status.") exit(1)