예제 #1
0
def test_lightgbm_kfserving():
    service_name = "isvc-lightgbm"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        lightgbm=V1beta1LightGBMSpec(
            storage_uri="gs://kfserving-examples/models/lightgbm",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v3.json")
    assert res["predictions"][0][0] > 0.5
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #2
0
def test_triton():
    service_name = 'isvc-triton'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        triton=V1beta1TritonSpec(
            storage_uri='gs://kfserving-samples/models/tensorrt'
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1,
                                   kind=constants.KFSERVING_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                                                                  "services", service_name + "-predictor-default"))
        deployments = KFServing.app_api. \
            list_namespaced_deployment(KFSERVING_TEST_NAMESPACE, label_selector='serving.kubeflow.org/'
                                                                                'inferenceservice={}'.
                                       format(service_name))
        for deployment in deployments.items:
            print(deployment)
        raise e
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #3
0
def test_torchserve_kfserving():
    service_name = "mnist"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri=
            "gs://kfserving-examples/models/torchserve/image_classifier",
            protocol_version="v1",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "1",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "1",
                    "memory": "4Gi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=api_v1beta1_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    res = predict(service_name, "./data/torchserve_input.json")
    assert (res.get("predictions")[0] == 2)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #4
0
def test_transformer():
    service_name = 'isvc-transformer'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
            model_class_name="Net",
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '256Mi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '256Mi'
                                             })),
    )
    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[
            V1Container(
                image=
                '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest',
                name='kfserving-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))
        ])

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         transformer=transformer))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/transformer.json')
    assert (np.argmax(res["predictions"]) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #5
0
def test_pmml_kfserving():
    service_name = 'isvc-pmml'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pmml=V1beta1PMMLSpec(storage_uri='gs://kfserving-examples/models/pmml',
                             resources=V1ResourceRequirements(requests={
                                 'cpu': '100m',
                                 'memory': '256Mi'
                             },
                                                              limits={
                                                                  'cpu':
                                                                  '100m',
                                                                  'memory':
                                                                  '256Mi'
                                                              })))

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/pmml_input.json')
    assert (res["predictions"] == [{
        'Species': 'setosa',
        'Probability_setosa': 1.0,
        'Probability_versicolor': 0.0,
        'Probability_virginica': 0.0,
        'Node_Id': '2'
    }])
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #6
0
def test_xgboost_v2_kfserving():
    service_name = "isvc-xgboost-v2"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        xgboost=V1beta1XGBoostSpec(
            storage_uri="gs://kfserving-samples/models/xgboost/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "256Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KFSERVING_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
    assert res["outputs"][0]["data"] == [1.0, 1.0]

    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #7
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        tensorflow=V1beta1TFServingSpec(
            storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
            resources=V1ResourceRequirements(requests={
                'cpu': '1',
                'memory': '2Gi'
            },
                                             limits={
                                                 'cpu': '1',
                                                 'memory': '2Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/flower_input.json')
    assert (np.argmax(res["predictions"][0].get('scores')) == 0)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
예제 #8
0
def test_tabular_explainer():
    service_name = 'isvc-explainer-tabular'
    predictor = V1beta1PredictorSpec(sklearn=V1beta1SKLearnSpec(
        storage_uri='gs://seldon-models/sklearn/income/model',
        resources=V1ResourceRequirements(requests={
            'cpu': '100m',
            'memory': '1Gi'
        },
                                         limits={
                                             'cpu': '100m',
                                             'memory': '1Gi'
                                         })))
    explainer = V1beta1ExplainerSpec(
        min_replicas=1,
        alibi=V1beta1AlibiExplainerSpec(
            name='kfserving-container',
            type='AnchorTabular',
            storage_uri=
            'gs://seldon-models/sklearn/income/explainer-py37-0.6.0',
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         explainer=explainer))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/income_input.json')
    assert (res["predictions"] == [0])
    precision = explain(service_name, './data/income_input.json')
    assert (precision > 0.9)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #9
0
def create_inference_service(metadata, predictor_spec):
    """
    Build and return V1beta1InferenceService object.
    """
    return V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=metadata,
        spec=V1beta1InferenceServiceSpec(predictor=predictor_spec),
    )
예제 #10
0
def test_tabular_explainer():
    service_name = 'art-explainer'
    isvc = V1beta1InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(
            predictor=V1beta1PredictorSpec(containers=[
                V1Container(
                    name="predictor",
                    # Update the image below to the aipipeline org.
                    image='aipipeline/art-server:mnist-predictor',
                    command=[
                        "python", "-m", "sklearnserver", "--model_name",
                        "art-explainer", "--model_dir",
                        "file://sklearnserver/sklearnserver/example_model"
                    ])
            ]),
            explainer=V1beta1ExplainerSpec(min_replicas=1,
                                           art=V1beta1ARTExplainerSpec(
                                               type='SquareAttack',
                                               name='explainer',
                                               config={"nb_classes": "10"}))))

    KFServing.create(isvc, version=kfserving_version)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input_bw_flat.json')
    assert (res["predictions"] == [3])

    adv_prediction = explain_art(service_name, './data/mnist_input_bw.json')
    assert (adv_prediction != 3)
    KFServing.delete(service_name,
                     KFSERVING_TEST_NAMESPACE,
                     version=kfserving_version)
예제 #11
0
def test_batcher():
    service_name = 'isvc-pytorch-batcher'
    predictor = V1beta1PredictorSpec(
        batcher=V1beta1Batcher(
            max_batch_size=32,
            max_latency=5000,
        ),
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
            model_class_name='Net',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '2Gi'},
                limits={'cpu': '100m', 'memory': '2Gi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1,
                                   kind=constants.KFSERVING_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name,
                                       namespace=KFSERVING_TEST_NAMESPACE
                                   ),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))
    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                                                                  "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.
                                                      format(service_name))
        for pod in pods.items:
            print(pod)
        raise e
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_res = [
            executor.submit(lambda: predict(service_name, './data/cifar_input.json')) for _ in range(4)
        ]
    results = [
        f.result()["batchId"] for f in future_res
    ]
    assert (all(x == results[0] for x in results))
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #12
0
def test_paddle():
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        paddle=V1beta1PaddleServerSpec(
            storage_uri=
            "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
            )))

    service_name = 'isvc-paddle'
    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=V1ObjectMeta(name=service_name,
                              namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  timeout_seconds=720)
    except RuntimeError as e:
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/jay.json')
    assert np.argmax(res["predictions"][0]) == 17

    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #13
0
def test_raw_deployment_kfserving():
    service_name = "raw-sklearn"
    annotations = dict()
    annotations['serving.kubeflow.org/raw'] = 'true'
    annotations['kubernetes.io/ingress.class'] = 'istio'

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://kfserving-samples/models/sklearn/iris",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name,
            namespace=KFSERVING_TEST_NAMESPACE,
            annotations=annotations,
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #14
0
def test_sklearn_v2_kfserving():
    service_name = "isvc-sklearn-v2"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://seldon-models/sklearn/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=api_v1beta1_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    KFServing.create(isvc, version=constants.KFSERVING_V1BETA1_VERSION)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    res = predict(service_name,
                  "./data/iris_input_v2.json",
                  protocol_version="v2")
    assert res["outputs"][0]["data"] == [1, 2]

    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #15
0
def test_kfserving_logger():
    msg_dumper = 'message-dumper'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        containers=[V1Container(name="kfserving-container",
                                image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')]
    )

    isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1,
                                   kind=constants.KFSERVING_KIND,
                                   metadata=client.V1ObjectMeta(
                                        name=msg_dumper, namespace=KFSERVING_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE)

    service_name = 'isvc-logger'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        logger=V1beta1LoggerSpec(
            mode="all",
            url="http://message-dumper."+KFSERVING_TEST_NAMESPACE+".svc.cluster.local"
        ),
        sklearn=V1beta1SKLearnSpec(
            storage_uri='gs://kfserving-samples/models/sklearn/iris',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '256Mi'},
                limits={'cpu': '100m', 'memory': '256Mi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KFSERVING_V1BETA1,
                                   kind=constants.KFSERVING_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError:
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.
                                                      format(service_name))
        for pod in pods.items:
            print(pod)

    res = predict(service_name, './data/iris_input.json')
    assert(res["predictions"] == [1, 1])
    pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                  label_selector='serving.kubeflow.org/inferenceservice={}'.
                                                  format(msg_dumper))
    time.sleep(5)
    log = ''
    for pod in pods.items:
        log += KFServing.core_api.read_namespaced_pod_log(name=pod.metadata.name,
                                                          namespace=pod.metadata.namespace,
                                                          container="kfserving-container")
        print(log)
    assert("org.kubeflow.serving.inference.request" in log)
    assert("org.kubeflow.serving.inference.response" in log)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
    KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
예제 #16
0
def test_tabular_explainer():
    service_name = 'aix-explainer'
    predictor = V1beta1PredictorSpec(containers=[
        V1Container(name="predictor",
                    image='aipipeline/rf-predictor:0.4.0',
                    command=[
                        "python", "-m", "rfserver", "--model_name",
                        "aix-explainer"
                    ],
                    resources=V1ResourceRequirements(requests={
                        'cpu': '500m',
                        'memory': '1Gi'
                    },
                                                     limits={
                                                         'cpu': '500m',
                                                         'memory': '1Gi'
                                                     }))
    ])
    explainer = V1beta1ExplainerSpec(min_replicas=1,
                                     aix=V1beta1AIXExplainerSpec(
                                         name='explainer',
                                         type='LimeImages',
                                         resources=V1ResourceRequirements(
                                             requests={
                                                 'cpu': '500m',
                                                 'memory': '1Gi'
                                             },
                                             limits={
                                                 'cpu': '500m',
                                                 'memory': '1Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         explainer=explainer))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input.json')
    assert (res["predictions"] == [[
        0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    ]])

    mask = explain_aix(service_name, './data/mnist_input.json')
    percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask))
    assert (percent_in_mask > 0.6)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #17
0
def test_transformer():
    service_name = 'raw'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri=
            'gs://kfserving-examples/models/torchserve/image_classifier',
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '2Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '2Gi'
                                             })),
    )
    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[
            V1Container(
                image='kfserving/torchserve-image-transformer:latest',
                name='kfserving-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '2Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '2Gi'
                                                 }),
                env=[
                    V1EnvVar(
                        name="STORAGE_URI",
                        value=
                        "gs://kfserving-examples/models/torchserve/image_classifier"
                    )
                ])
        ])

    annotations = dict()
    annotations['serving.kubeflow.org/raw'] = 'true'
    annotations['kubernetes.io/ingress.class'] = 'istio'
    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE,
                                     annotations=annotations),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         transformer=transformer))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        raise e

    time.sleep(30)

    isvc = KFServing.get(
        service_name,
        namespace=KFSERVING_TEST_NAMESPACE,
    )

    cluster_ip = get_cluster_ip()
    logging.info("clusterip = %s", cluster_ip)

    host = isvc["status"]["url"]
    host = host[host.rfind('/') + 1:]
    url = 'http://{}/v1/models/mnist:predict'.format(cluster_ip)
    logging.info("url = %s ", url)
    headers = {"Host": host}
    data_str = '{"instances": [{"data": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAw0lE\
    QVR4nGNgGFggVVj4/y8Q2GOR83n+58/fP0DwcSqmpNN7oOTJw6f+/H2pjUU2JCSEk0EWqN0cl828e/FIxvz9/9cCh1\
        zS5z9/G9mwyzl/+PNnKQ45nyNAr9ThMHQ/UG4tDofuB4bQIhz6fIBenMWJQ+7Vn7+zeLCbKXv6z59NOPQVgsIcW\
            4QA9YFi6wNQLrKwsBebW/68DJ388Nun5XFocrqvIFH59+XhBAxThTfeB0r+vP/QHbuDCgr2JmOXoSsAAKK7b\
                U3vISS4AAAAAElFTkSuQmCC", "target": 0}]}'

    res = requests.post(url, data_str, headers=headers)
    logging.info("res.text = %s", res.text)
    preds = json.loads(res.content.decode("utf-8"))
    assert (preds["predictions"] == [2])

    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
예제 #18
0
def test_mms_sklearn_kfserving(protocol_version: str, storage_uris: List[str]):
    # Define an inference service
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            protocol_version=protocol_version,
            resources=client.V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    service_name = f"isvc-sklearn-mms-{protocol_version}"
    isvc = V1beta1InferenceService(
        api_version=constants.KFSERVING_V1BETA1,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    # Create an instance of inference service with isvc
    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    cluster_ip = get_cluster_ip()

    model_names = [
        f"model1-sklearn-{protocol_version}",
        f"model2-sklearn-{protocol_version}",
    ]

    for model_name, storage_uri in zip(model_names, storage_uris):
        model_spec = V1alpha1ModelSpec(
            storage_uri=storage_uri,
            memory="128Mi",
            framework="sklearn",
        )

        model = V1alpha1TrainedModel(
            api_version=constants.KFSERVING_V1ALPHA1,
            kind=constants.KFSERVING_KIND_TRAINEDMODEL,
            metadata=client.V1ObjectMeta(name=model_name,
                                         namespace=KFSERVING_TEST_NAMESPACE),
            spec=V1alpha1TrainedModelSpec(inference_service=service_name,
                                          model=model_spec),
        )

        # Create instances of trained models using model1 and model2
        KFServing.create_trained_model(model, KFSERVING_TEST_NAMESPACE)

        KFServing.wait_model_ready(
            service_name,
            model_name,
            isvc_namespace=KFSERVING_TEST_NAMESPACE,
            isvc_version=constants.KFSERVING_V1BETA1_VERSION,
            protocol_version=protocol_version,
            cluster_ip=cluster_ip,
        )

    input_json = "./data/iris_input.json"
    if protocol_version == "v2":
        input_json = "./data/iris_input_v2.json"

    responses = [
        predict(
            service_name,
            input_json,
            model_name=model_name,
            protocol_version=protocol_version,
        ) for model_name in model_names
    ]

    if protocol_version == "v1":
        assert responses[0]["predictions"] == [1, 1]
        assert responses[1]["predictions"] == [1, 1]
    elif protocol_version == "v2":
        assert responses[0]["outputs"][0]["data"] == [1, 2]
        assert responses[1]["outputs"][0]["data"] == [1, 2]

    # Clean up inference service and trained models
    for model_name in model_names:
        KFServing.delete_trained_model(model_name, KFSERVING_TEST_NAMESPACE)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)