def predict_str(service_name, input_json, protocol_version="v1", version=constants.KSERVE_V1BETA1_VERSION, model_name=None): kfs_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) isvc = kfs_client.get( service_name, namespace=KSERVE_TEST_NAMESPACE, version=version, ) # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604 time.sleep(10) cluster_ip = get_cluster_ip() host = urlparse(isvc["status"]["url"]).netloc headers = {"Host": host} if model_name is None: model_name = service_name url = f"http://{cluster_ip}/v1/models/{model_name}:predict" if protocol_version == "v2": url = f"http://{cluster_ip}/v2/models/{model_name}/infer" logging.info("Sending Header = %s", headers) logging.info("Sending url = %s", url) logging.info("Sending request data: %s", input_json) response = requests.post(url, input_json, headers=headers) logging.info("Got response code %s, content %s", response.status_code, response.content) preds = json.loads(response.content.decode("utf-8")) return preds
def explain_response(service_name, input_json): kfs_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) isvc = kfs_client.get( service_name, namespace=KSERVE_TEST_NAMESPACE, version=constants.KSERVE_V1BETA1_VERSION, ) # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604 time.sleep(10) cluster_ip = get_cluster_ip() host = urlparse(isvc["status"]["url"]).netloc url = "http://{}/v1/models/{}:explain".format(cluster_ip, service_name) headers = {"Host": host} with open(input_json) as json_file: data = json.load(json_file) logging.info("Sending request data: %s", json.dumps(data)) try: response = requests.post(url, json.dumps(data), headers=headers) logging.info( "Got response code %s, content %s", response.status_code, response.content, ) json_response = json.loads(response.content.decode("utf-8")) except (RuntimeError, json.decoder.JSONDecodeError) as e: logging.info("Explain error -------") logging.info( kfs_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-explainer", )) pods = kfs_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector="serving.kserve.io/inferenceservice={}".format( service_name), ) for pod in pods.items: logging.info(pod) logging.info( "%s\t%s\t%s" % (pod.metadata.name, pod.status.phase, pod.status.pod_ip)) api_response = kfs_client.core_api.read_namespaced_pod_log( pod.metadata.name, KSERVE_TEST_NAMESPACE, container="kserve-container", ) logging.info(api_response) raise e return json_response
def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, custom_model_spec, service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the provided input and then sent for creation/update. :return InferenceService JSON output """ kserve_client = KServeClient() if inferenceservice_yaml: # Overwrite name and namespace if exists if namespace: inferenceservice_yaml['metadata']['namespace'] = namespace if model_name: inferenceservice_yaml['metadata']['name'] = model_name else: model_name = inferenceservice_yaml['metadata']['name'] isvc = inferenceservice_yaml elif action != 'delete': # Create annotations annotations = {} if int(autoscaling_target) != 0: annotations["autoscaling.knative.dev/target"] = str( autoscaling_target) if not enable_istio_sidecar: annotations["sidecar.istio.io/inject"] = 'false' if not annotations: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) # If a custom model container spec was provided, build the V1Container # object using it. containers = [] if custom_model_spec: containers = [create_custom_container_spec(custom_model_spec)] # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec(framework, model_uri, canary_traffic_percent, service_account, min_replicas, max_replicas, containers, request_timeout) isvc = create_inference_service(metadata, predictor_spec) if action == "create": submit_api_request(kserve_client, 'create', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "update": submit_api_request(kserve_client, 'update', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "apply": try: submit_api_request(kserve_client, 'create', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) except Exception: submit_api_request(kserve_client, 'update', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "delete": kserve_client.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = kserve_client.get(model_name, namespace=namespace) return model_status