Esempio n. 1
0
 def generate_predictor_spec(self,
                             framework,
                             storage_uri=None,
                             container=None):
     '''Generate predictor spec according to framework and
        default_storage_uri or custom container.
     '''
     if self.framework == 'tensorflow':
         predictor = V1alpha2PredictorSpec(
             tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))
     elif self.framework == 'onnx':
         predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec(
             storage_uri=storage_uri))
     elif self.framework == 'pytorch':
         predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec(
             storage_uri=storage_uri))
     elif self.framework == 'sklearn':
         predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec(
             storage_uri=storage_uri))
     elif self.framework == 'triton':
         predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec(
             storage_uri=storage_uri))
     elif self.framework == 'xgboost':
         predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec(
             storage_uri=storage_uri))
     elif self.framework == 'custom':
         predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
             container=container))
     else:
         raise RuntimeError("Unsupported framework {}".format(framework))
     return predictor
def customEndpointSpec(custom_model_spec, service_account, min_replicas,
                       max_replicas):
    env = ([
        client.V1EnvVar(name=i["name"], value=i["value"])
        for i in custom_model_spec["env"]
    ] if custom_model_spec.get("env", "") else None)
    ports = ([
        client.V1ContainerPort(container_port=int(
            custom_model_spec.get("port", "")),
                               protocol="TCP")
    ] if custom_model_spec.get("port", "") else None)
    resources = (client.V1ResourceRequirements(
        requests=(
            custom_model_spec["resources"]["requests"] if
            custom_model_spec.get('resources', {}).get('requests') else None),
        limits=(
            custom_model_spec["resources"]["limits"] if custom_model_spec.get(
                'resources', {}).get('limits') else None),
    ) if custom_model_spec.get("resources", {}) else None)
    containerSpec = client.V1Container(
        name=custom_model_spec.get("name", "custom-container"),
        image=custom_model_spec["image"],
        env=env,
        ports=ports,
        command=custom_model_spec.get("command", None),
        args=custom_model_spec.get("args", None),
        image_pull_policy=custom_model_spec.get("image_pull_policy", None),
        working_dir=custom_model_spec.get("working_dir", None),
        resources=resources)
    return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        custom=V1alpha2CustomSpec(container=containerSpec),
        service_account_name=service_account,
        min_replicas=(min_replicas if min_replicas >= 0 else None),
        max_replicas=(max_replicas if max_replicas > 0
                      and max_replicas >= min_replicas else None)))
Esempio n. 3
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(
                container=V1Container(
                  image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
                  name='kfserving-container',
                  resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'})))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    wait_for_isvc_ready(service_name)
    probs = predict(service_name, './data/transformer.json')
    assert(np.argmax(probs) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 4
0
def EndpointSpec(framework,
                 storage_uri,
                 service_account_name="k8s-sa",
                 transformer_custom_image=""):
    if framework == 'tensorflow':
        return V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                service_account_name=service_account_name,
                tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)),
            transformer=V1alpha2TransformerSpec(
                min_replicas=1,
                custom=V1alpha2CustomSpec(container=client.V1Container(
                    image=transformer_custom_image,
                    name="kfserving-container"))))
    elif framework == 'pytorch':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri)))
    elif framework == 'sklearn':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri)))
    elif framework == 'xgboost':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri)))
    elif framework == 'onnx':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)))
    elif framework == 'tensorrt':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri)))
    else:
        raise ("Error: No matching framework: " + framework)
Esempio n. 5
0
def customEndpointSpec(custom_model_spec, service_account):
    env = (
        [
            client.V1EnvVar(name=i["name"], value=i["value"])
            for i in custom_model_spec["env"]
        ]
        if custom_model_spec.get("env", "")
        else None
    )
    ports = (
        [client.V1ContainerPort(container_port=int(custom_model_spec.get("port", "")))]
        if custom_model_spec.get("port", "")
        else None
    )
    containerSpec = client.V1Container(
        name=custom_model_spec.get("name", "custom-container"),
        image=custom_model_spec["image"],
        env=env,
        ports=ports,
        command=custom_model_spec.get("command", None),
        args=custom_model_spec.get("args", None),
        image_pull_policy=custom_model_spec.get("image_pull_policy", None),
        working_dir=custom_model_spec.get("working_dir", None),
    )
    return V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            custom=V1alpha2CustomSpec(container=containerSpec),
            service_account_name=service_account,
        )
    )
Esempio n. 6
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                image=
                '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest',
                name='kfserving-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 })))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/transformer.json')
    assert (np.argmax(res["predictions"]) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tabular_explainer():
    service_name = 'aix-explainer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            custom=V1alpha2CustomSpec(
                container=V1Container(
                    name="predictor",
                    image='aipipeline/rf-predictor:0.4.0',
                    command=["python", "-m", "rfserver", "--model_name", "aix-explainer"],
                    resources=V1ResourceRequirements(
                        requests={'cpu': '500m', 'memory': '1Gi'},
                        limits={'cpu': '500m', 'memory': '1Gi'})
                ))),
        explainer=V1alpha2ExplainerSpec(
            min_replicas=1,
            aix=V1alpha2AIXExplainerSpec(
                type='LimeImages',
                resources=V1ResourceRequirements(
                    requests={'cpu': '500m', 'memory': '1Gi'},
                    limits={'cpu': '500m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720)
    except RuntimeError as e:
        logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                     KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input.json')
    assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

    mask = explain_aix(service_name, './data/mnist_input.json')
    percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask))
    assert(percent_in_mask > 0.6)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 8
0
    def roll(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()
        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username,
                ports=service.ports)
            return container

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        KFServing = KFServingClient()
        KFServing.rollout_canary(mykfservice.name,
                                 canary=canary_endpoint_spec,
                                 percent=mykfservice.canary_traffic_percent,
                                 namespace=namespace,
                                 timeout_seconds=120,
                                 version=crd_info['version'])

        flash(category='warning', message='滚动升级已配置,刷新查看当前流量比例')
        return redirect('/kfservice_modelview/list/')
Esempio n. 9
0
def customEndpointSpec(custom_model_spec):
    env = [
        client.V1EnvVar(name=i['name'], value=i['value'])
        for i in custom_model_spec['env']
    ] if custom_model_spec.get('env', '') else None
    ports = [
        client.V1ContainerPort(
            container_port=int(custom_model_spec.get('port', '')))
    ] if custom_model_spec.get('port', '') else None
    containerSpec = client.V1Container(
        name=custom_model_spec.get('name', 'custom-container'),
        image=custom_model_spec['image'],
        env=env,
        ports=ports,
        command=custom_model_spec.get('command', None),
        args=custom_model_spec.get('args', None),
        image_pull_policy=custom_model_spec.get('image_pull_policy', None),
        working_dir=custom_model_spec.get('working_dir', None))
    return V1alpha2EndpointSpec(custom=V1alpha2CustomSpec(
        container=containerSpec))
Esempio n. 10
0
    def run(self):
        logger.info("Retrieving kfserving client")
        client = KFServingClient()

        logger.info("Specifying canary")
        canary = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                name=Serve.SERVICE_NAME,
                image="{}:{}".format(Pipeline.DEPLOY_IMAGE, self.args.tag),
                image_pull_policy="Always",
            ))))

        logger.info("Rolling out canary deployment")
        client.rollout_canary(Serve.SERVICE_NAME,
                              canary=canary,
                              percent=50,
                              namespace=Rollout.NAMESPACE,
                              watch=True)

        logger.info("Promoting canary deployment")
        client.promote(Serve.SERVICE_NAME,
                       namespace=Rollout.NAMESPACE,
                       watch=True)
Esempio n. 11
0
def test_kfserving_logger():
    msg_dumper = 'message-dumper'
    default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        min_replicas=1,
        custom=V1alpha2CustomSpec(container=V1Container(
            name="kfserving-container",
            image=
            'gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display',
        ))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=msg_dumper,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE)

    service_name = 'isvc-logger'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            logger=V1alpha2Logger(
                mode="all",
                url="http://message-dumper-predictor-default." +
                KFSERVING_TEST_NAMESPACE),
            sklearn=V1alpha2SKLearnSpec(
                storage_uri='gs://kfserving-samples/models/sklearn/iris',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/iris_input.json')
    assert (res["predictions"] == [1, 1])
    pods = KFServing.core_api.list_namespaced_pod(
        KFSERVING_TEST_NAMESPACE,
        label_selector='serving.kubeflow.org/inferenceservice={}'.format(
            msg_dumper))
    for pod in pods.items:
        log = KFServing.core_api.read_namespaced_pod_log(
            name=pod.metadata.name,
            namespace=pod.metadata.namespace,
            container="kfserving-container")
        print(log)
        assert ("org.kubeflow.serving.inference.request" in log)
        assert ("org.kubeflow.serving.inference.response" in log)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
    KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
Esempio n. 12
0
    def deploy(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()

        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username)
            return container

        api_version = crd_info['group'] + '/' + crd_info['version']
        default_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.default_service.min_replicas,
                max_replicas=mykfservice.default_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.default_service,
                    mykfservice)))) if mykfservice.default_service else None

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        metadata = kubernetes.client.V1ObjectMeta(
            name=mykfservice.name,
            labels={
                "app": mykfservice.name,
                "rtx-user": mykfservice.created_by.username
            },
            namespace=namespace)

        isvc = V1alpha2InferenceService(
            api_version=api_version,
            kind=crd_info['kind'],
            metadata=metadata,
            spec=V1alpha2InferenceServiceSpec(
                default=default_endpoint_spec,
                canary=canary_endpoint_spec,
                canary_traffic_percent=mykfservice.canary_traffic_percent))

        KFServing = KFServingClient()
        try:
            KFServing.delete(mykfservice.name,
                             namespace=namespace,
                             version=crd_info['version'])
        except Exception as e:
            print(e)

        KFServing.create(isvc,
                         namespace=namespace,
                         version=crd_info['version'])

        flash(category='warning', message='部署启动,一分钟后部署完成')
        return redirect('/kfservice_modelview/list/')
Esempio n. 13
0
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2CustomSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements

namespace = utils.get_default_target_namespace()

api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION

default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2CustomSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
        resources=V1ResourceRequirements(requests={
            'cpu': '100m',
            'memory': '1Gi'
        },
                                         limits={
                                             'cpu': '100m',
                                             'memory': '1Gi'
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name='flower-sample', namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()

KFServing.create(isvc)
Esempio n. 14
0
def main():

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
            container=V1Container(
                name="kfserving-container",
                image=FLAGS.image,
                env=[{
                    "name": "STORAGE_URI",
                    "value": "%s" % FLAGS.storage_uri
                }],
                resources=V1ResourceRequirements(
                    limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}),
                command=["python"],
                args=[
                    "model.py",
                    "--model-name",
                    "%s" % FLAGS.inference_name,
                    "--out_dir",
                    "%s" % FLAGS.model_path,
                    "--classes_file",
                    "%s" % FLAGS.classes_file,
                ]))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name,
                                         namespace=FLAGS.namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    # Create inference service
    KFServing = KFServingClient()
    KFServing.create(isvc)
    time.sleep(2)

    # Check inference service
    KFServing.get(FLAGS.inference_name,
                  namespace=FLAGS.namespace,
                  watch=True,
                  timeout_seconds=180)

    model_status = KFServing.get(FLAGS.inference_name,
                                 namespace=FLAGS.namespace)

    for condition in model_status["status"]["conditions"]:
        if condition['type'] == 'Ready':
            if condition['status'] == 'True':
                print('Model is ready')
                break
            else:
                print(
                    'Model is timed out, please check the inferenceservice events for more details.'
                )
                exit(1)
    try:
        print(
            model_status["status"]["url"] +
            " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands"
        )
        print("Sample test commands: ")
        print(
            "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT"
        )
        print(
            "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
        )
        # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample
        url = re.compile(r"https?://")
        host, path = url.sub("", model_status["status"]["url"]).split("/", 1)
        print('curl -X GET -H "Host: ' + host +
              '" http://$ISTIO_INGRESS_ENDPOINT/' + path)
    except:
        print("Model is not ready, check the logs for the Knative URL status.")
        exit(1)