def InferenceService(
    metadata, default_model_spec, canary_model_spec=None, canary_model_traffic=None
):
    return V1alpha2InferenceService(
        api_version=constants.KFSERVING_GROUP + "/" + constants.KFSERVING_VERSION,
        kind=constants.KFSERVING_KIND,
        metadata=metadata,
        spec=V1alpha2InferenceServiceSpec(
            default=default_model_spec,
            canary=canary_model_spec,
            canary_traffic_percent=canary_model_traffic,
        ),
    )
Ejemplo n.º 2
0
def test_batcher():
    service_name = 'isvc-pytorch-batcher'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            batcher=V1alpha2Batcher(
                max_batch_size=32, max_latency=5000, timeout=60),
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name='Net',
                resources=V1ResourceRequirements(requests={
                    'cpu': '1000m',
                    'memory': '2Gi'
                },
                                                 limits={
                                                     'cpu': '1000m',
                                                     'memory': '2Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_res = [
            executor.submit(
                lambda: predict(service_name, './data/cifar_input.json'))
            for _ in range(4)
        ]
    results = [f.result()["batchId"] for f in future_res]
    assert (all(x == results[0] for x in results) == True)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 3
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
                name='kfserving-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 })))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        raise e
    probs = predict(service_name, './data/transformer.json')
    assert (np.argmax(probs) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 4
0
def test_canary_rollout():
    service_name = 'isvc-canary'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=constants.KFSERVING_API_VERSION,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # define canary endpoint spec, and then rollout 10% traffic to the canary version
    canary_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2',
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '256Mi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '256Mi'
                                             }))))

    KFServing.rollout_canary(service_name,
                             canary=canary_endpoint_spec,
                             percent=10,
                             namespace=KFSERVING_TEST_NAMESPACE,
                             watch=True,
                             timeout_seconds=120)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 5
0
def test_pytorch():
    service_name = 'isvc-pytorch'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '2Gi',
                    'nvidia.com/gpu': '1'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '2Gi',
                                                     'nvidia.com/gpu': '1'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name,
            namespace=KFSERVING_TEST_NAMESPACE,
            annotations={
                'serving.kubeflow.org/gke-accelerator': 'nvidia-tesla-k80'
            }),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    probs = predict(service_name, './data/cifar_input.json')
    assert (np.argmax(probs) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 6
0
def test_lightgbm_kfserving():
    service_name = "isvc-lightgbm"
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            lightgbm=V1alpha2LightGBMSpec(
                storage_uri="gs://kfserving-examples/models/lightgbm",
                resources=V1ResourceRequirements(
                    requests={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                    limits={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                ),
            ),
        ))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  version=constants.KFSERVING_VERSION)
    except RuntimeError as e:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  version=constants.KFSERVING_V1BETA1_VERSION)
    try:
        res = predict(service_name,
                      "./data/iris_input_v3.json",
                      version=constants.KFSERVING_VERSION)
    except KeyError:
        res = predict(service_name,
                      "./data/iris_input_v3.json",
                      version=constants.KFSERVING_V1BETA1_VERSION)
    assert res["predictions"][0][0] > 0.5
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def test_tensorflow_kfserving():
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name='isvc-tensorflow-test', namespace='kfserving-ci-e2e-test'),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    wait_for_isvc_ready('isvc-tensorflow-test')
Ejemplo n.º 8
0
def test_tabular_explainer():
    service_name = 'aix-explainer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            custom=V1alpha2CustomSpec(
                container=V1Container(
                    name="predictor",
                    image='aipipeline/rf-predictor:0.4.0',
                    command=["python", "-m", "rfserver", "--model_name", "aix-explainer"],
                    resources=V1ResourceRequirements(
                        requests={'cpu': '500m', 'memory': '1Gi'},
                        limits={'cpu': '500m', 'memory': '1Gi'})
                ))),
        explainer=V1alpha2ExplainerSpec(
            min_replicas=1,
            aix=V1alpha2AIXExplainerSpec(
                type='LimeImages',
                resources=V1ResourceRequirements(
                    requests={'cpu': '500m', 'memory': '1Gi'},
                    limits={'cpu': '500m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720)
    except RuntimeError as e:
        logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                     KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input.json')
    assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

    mask = explain_aix(service_name, './data/mnist_input.json')
    percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask))
    assert(percent_in_mask > 0.6)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 9
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(
                container=V1Container(
                  image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest',
                  name='kfserving-container',
                  resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'})))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                                                                  "services", service_name + "-predictor"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'
                                                      .format(service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/transformer.json')
    assert(np.argmax(res["predictions"]) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 10
0
    def run(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('--namespace', required=False, default='kubeflow')
        # pvc://${PVCNAME}/dir
        parser.add_argument('--storage_uri',
                            required=False,
                            default='/mnt/export')
        parser.add_argument('--name',
                            required=False,
                            default='kfserving-sample')
        args = parser.parse_args()
        namespace = args.namespace
        serving_name = args.name

        api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        default_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
                storage_uri=args.storage_uri,
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))
        isvc = V1alpha2InferenceService(
            api_version=api_version,
            kind=constants.KFSERVING_KIND,
            metadata=client.V1ObjectMeta(name=serving_name,
                                         namespace=namespace),
            spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

        KFServing = KFServingClient()
        KFServing.create(isvc)
        print('waiting 5 sec for Creating InferenceService')
        time.sleep(30)

        KFServing.get(serving_name,
                      namespace=namespace,
                      watch=True,
                      timeout_seconds=300)
Ejemplo n.º 11
0
def test_tabular_explainer():
    service_name = 'isvc-explainer-tabular'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(
                storage_uri='gs://seldon-models/sklearn/income/model',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '1Gi'},
                    limits={'cpu': '100m', 'memory': '1Gi'}))),
        explainer=V1alpha2ExplainerSpec(
            min_replicas=1,
            alibi=V1alpha2AlibiExplainerSpec(
                type='AnchorTabular',
                storage_uri='gs://seldon-models/sklearn/income/alibi/0.4.0',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '1Gi'},
                    limits={'cpu': '100m', 'memory': '1Gi'}))))
    
    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                      name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=300)
    except RuntimeError as e:
        logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
           KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
               label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e
    
    res = predict(service_name, './data/income_input.json')
    assert(res["predictions"] == [0])
    precision = explain(service_name, './data/income_input.json')
    assert(precision > 0.9)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def deploy_model(namespace: str, trained_model_path: InputPath(str)):
    from kubernetes import client
    from kfserving import KFServingClient
    from kfserving import constants
    from kfserving import V1alpha2EndpointSpec
    from kfserving import V1alpha2PredictorSpec
    from kfserving import V1alpha2TensorflowSpec
    from kfserving import V1alpha2InferenceServiceSpec
    from kfserving import V1alpha2InferenceService
    from kubernetes.client import V1ResourceRequirements

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    inference_service_name = 'inference112cbk'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri=trained_model_path,
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=inference_service_name,
                                     namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing = KFServingClient()
    KFServing.create(isvc)
    print('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
    print('Model deployed')
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
Ejemplo n.º 14
0
def test_xgboost_kfserving():
    service_name = 'isvc-xgboost'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                storage_uri='gs://kfserving-samples/models/xgboost/iris',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    probs = predict(service_name, './data/iris_input.json')
    assert(probs == [1, 1])
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 15
0
    def generate_isvc(self):
        """ generate InferenceService """

        api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        default_predictor, canary_predictor = None, None

        if self.framework == 'custom':
            default_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_default_container)
        else:
            default_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.default_storage_uri)

        if self.framework != 'custom' and self.canary_storage_uri is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.canary_storage_uri)
        if self.framework == 'custom' and self.custom_canary_container is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_canary_container)

        if canary_predictor:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary=V1alpha2EndpointSpec(predictor=canary_predictor),
                canary_traffic_percent=self.canary_traffic_percent)
        else:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary_traffic_percent=self.canary_traffic_percent)

        return V1alpha2InferenceService(
            api_version=api_version,
            kind=constants.KFSERVING_KIND,
            metadata=k8s_client.V1ObjectMeta(
                name=self.isvc_name,
                generate_name=constants.KFSERVING_DEFAULT_NAME,
                namespace=self.namespace),
            spec=isvc_spec)
Ejemplo n.º 16
0
def deploy_model(namespace, model_file_name, gcp_bucket):

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'xgboost-r' + now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name,
                storage_uri='gs://' + gcp_bucket + '/rmodel',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=inference_service_name,
            namespace=namespace,
            annotations={'sidecar.istio.io/inject': 'false'}),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
Ejemplo n.º 17
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(
                    requests={'cpu': '1', 'memory': '2Gi'},
                    limits={'cpu': '1', 'memory': '2Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/flower_input.json')
    assert(np.argmax(res["predictions"][0].get('scores')) == 0)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 18
0
def test_kfserving_logger():
    msg_dumper = 'message-dumper'
    default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        min_replicas=1,
        custom=V1alpha2CustomSpec(container=V1Container(
            name="kfserving-container",
            image=
            'gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display',
        ))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=msg_dumper,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(msg_dumper, namespace=KFSERVING_TEST_NAMESPACE)

    service_name = 'isvc-logger'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            logger=V1alpha2Logger(
                mode="all",
                url="http://message-dumper-predictor-default." +
                KFSERVING_TEST_NAMESPACE),
            sklearn=V1alpha2SKLearnSpec(
                storage_uri='gs://kfserving-samples/models/sklearn/iris',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/iris_input.json')
    assert (res["predictions"] == [1, 1])
    pods = KFServing.core_api.list_namespaced_pod(
        KFSERVING_TEST_NAMESPACE,
        label_selector='serving.kubeflow.org/inferenceservice={}'.format(
            msg_dumper))
    for pod in pods.items:
        log = KFServing.core_api.read_namespaced_pod_log(
            name=pod.metadata.name,
            namespace=pod.metadata.namespace,
            container="kfserving-container")
        print(log)
        assert ("org.kubeflow.serving.inference.request" in log)
        assert ("org.kubeflow.serving.inference.response" in log)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
    KFServing.delete(msg_dumper, KFSERVING_TEST_NAMESPACE)
Ejemplo n.º 19
0
api_version = constants.KFSERVING_GROUP + "/" + constants.KFSERVING_VERSION
default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2TensorflowSpec(
        storage_uri="s3://anonymous-model-result/result/saved_model",
        resources=V1ResourceRequirements(requests={
            "cpu": "100m",
            "memory": "1Gi"
        },
                                         limits={
                                             "cpu": "100m",
                                             "memory": "1Gi"
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()
KFServing.set_credentials(
    storage_type="S3",
    namespace='anonymous',
    credentials_file='credentials',
    s3_profile="default",
    s3_endpoint="minio-service.kubeflow.svc.cluster.local:9000",
    s3_region="us-west-1",
    s3_use_https="0",
    s3_verify_ssl="0")

KFServing.create(isvc)
Ejemplo n.º 20
0
    def deploy(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()

        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username)
            return container

        api_version = crd_info['group'] + '/' + crd_info['version']
        default_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.default_service.min_replicas,
                max_replicas=mykfservice.default_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.default_service,
                    mykfservice)))) if mykfservice.default_service else None

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        metadata = kubernetes.client.V1ObjectMeta(
            name=mykfservice.name,
            labels={
                "app": mykfservice.name,
                "rtx-user": mykfservice.created_by.username
            },
            namespace=namespace)

        isvc = V1alpha2InferenceService(
            api_version=api_version,
            kind=crd_info['kind'],
            metadata=metadata,
            spec=V1alpha2InferenceServiceSpec(
                default=default_endpoint_spec,
                canary=canary_endpoint_spec,
                canary_traffic_percent=mykfservice.canary_traffic_percent))

        KFServing = KFServingClient()
        try:
            KFServing.delete(mykfservice.name,
                             namespace=namespace,
                             version=crd_info['version'])
        except Exception as e:
            print(e)

        KFServing.create(isvc,
                         namespace=namespace,
                         version=crd_info['version'])

        flash(category='warning', message='部署启动,一分钟后部署完成')
        return redirect('/kfservice_modelview/list/')
Ejemplo n.º 21
0
def main():

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
            container=V1Container(
                name="kfserving-container",
                image=FLAGS.image,
                env=[{
                    "name": "STORAGE_URI",
                    "value": "%s" % FLAGS.storage_uri
                }],
                resources=V1ResourceRequirements(
                    limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}),
                command=["python"],
                args=[
                    "model.py",
                    "--model-name",
                    "%s" % FLAGS.inference_name,
                    "--out_dir",
                    "%s" % FLAGS.model_path,
                    "--classes_file",
                    "%s" % FLAGS.classes_file,
                ]))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name,
                                         namespace=FLAGS.namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    # Create inference service
    KFServing = KFServingClient()
    KFServing.create(isvc)
    time.sleep(2)

    # Check inference service
    KFServing.get(FLAGS.inference_name,
                  namespace=FLAGS.namespace,
                  watch=True,
                  timeout_seconds=180)

    model_status = KFServing.get(FLAGS.inference_name,
                                 namespace=FLAGS.namespace)

    for condition in model_status["status"]["conditions"]:
        if condition['type'] == 'Ready':
            if condition['status'] == 'True':
                print('Model is ready')
                break
            else:
                print(
                    'Model is timed out, please check the inferenceservice events for more details.'
                )
                exit(1)
    try:
        print(
            model_status["status"]["url"] +
            " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands"
        )
        print("Sample test commands: ")
        print(
            "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT"
        )
        print(
            "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
        )
        # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample
        url = re.compile(r"https?://")
        host, path = url.sub("", model_status["status"]["url"]).split("/", 1)
        print('curl -X GET -H "Host: ' + host +
              '" http://$ISTIO_INGRESS_ENDPOINT/' + path)
    except:
        print("Model is not ready, check the logs for the Knative URL status.")
        exit(1)