Esempio n. 1
0
def test_canary_rollout():
    service_name = 'isvc-canary'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))))

    isvc = V1alpha2InferenceService(api_version=constants.KFSERVING_API_VERSION,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # define canary endpoint spec, and then rollout 10% traffic to the canary version
    canary_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2',
                resources=V1ResourceRequirements(
                    requests={'cpu':'100m','memory':'256Mi'},
                    limits={'cpu':'100m', 'memory':'256Mi'}))))

    KFServing.rollout_canary(service_name, canary=canary_endpoint_spec, percent=10,
                             namespace=KFSERVING_TEST_NAMESPACE, watch=True, timeout_seconds=120)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
Esempio n. 2
0
 def run(self):
     parser = argparse.ArgumentParser()
     parser.add_argument('--namespace', required=False, default='kubeflow')
     # pvc://${PVCNAME}/dir
     parser.add_argument('--storage_uri', required=False, default='/mnt/export')
     parser.add_argument('--name', required=False, default='kfserving-sample')        
     args = parser.parse_args()
     namespace = args.namespace
     serving_name =  args.name
     
     api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
     default_endpoint_spec = V1alpha2EndpointSpec(
                               predictor=V1alpha2PredictorSpec(
                                 tensorflow=V1alpha2TensorflowSpec(
                                   storage_uri=args.storage_uri,
                                   resources=V1ResourceRequirements(
                                       requests={'cpu':'100m','memory':'1Gi'},
                                       limits={'cpu':'100m', 'memory':'1Gi'}))))
     isvc = V1alpha2InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=serving_name, namespace=namespace),
                               spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))        
     
     KFServing = KFServingClient()
     KFServing.create(isvc)
     print('waiting 5 sec for Creating InferenceService')
     time.sleep(5)
     
     KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
Esempio n. 3
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '1',
                    'memory': '2Gi'
                },
                                                 limits={
                                                     'cpu': '1',
                                                     'memory': '2Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    probs = predict(service_name, './data/flower_input.json')
    assert (np.argmax(probs[0].get('scores')) == 0)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
Esempio n. 4
0
def test_xgboost_kfserving():
    service_name = "isvc-xgboost"
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                storage_uri="gs://kfserving-samples/models/xgboost/iris",
                resources=V1ResourceRequirements(
                    requests={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                    limits={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                ),
            ),
        ))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 5
0
def test_pmml_kfserving():
    service_name = 'isvc-pmml'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pmml=V1alpha2PMMLSpec(
                storage_uri='gs://kfserving-examples/models/pmml',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    res = predict(service_name, './data/pmml_input.json')
    assert (res["predictions"] == [[1.0, 0.0, 0.0, "2"]])
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 6
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
Esempio n. 7
0
def create_inference_service(namespace: str, name: str, storage_url: str,
                             runtime_version: str, service_account_name: str):
    api_version = os.path.join(constants.KFSERVING_GROUP,
                               constants.KFSERVING_VERSION)

    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            service_account_name=service_account_name,
            triton=V1alpha2TritonSpec(runtime_version=runtime_version,
                                      storage_uri=storage_url,
                                      resources=V1ResourceRequirements(
                                          requests={
                                              'cpu': '100m',
                                              'memeory': '1Gi'
                                          },
                                          limits={
                                              'cpu': '100m',
                                              'memory': '1Gi'
                                          }))))
    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=name, namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
Esempio n. 8
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(
                container=V1Container(
                  image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
                  name='kfserving-container',
                  resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'})))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    wait_for_isvc_ready(service_name)
    probs = predict(service_name, './data/transformer.json')
    assert(np.argmax(probs) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 9
0
def test_triton():
    service_name = 'isvc-triton'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            triton=V1alpha2TritonSpec(
                storage_uri='gs://kfserving-samples/models/tensorrt')))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        deployments = KFServing.app_api.list_namespaced_deployment(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for deployment in deployments.items:
            print(deployment)
        raise e
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 10
0
def test_pytorch():
    service_name = 'isvc-pytorch'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            parallelism=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'},
                    limits={'cpu': '100m', 'memory': '2Gi', 'nvidia.com/gpu': '1'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE,
                                        annotations={'serving.kubeflow.org/gke-accelerator': 'nvidia-tesla-k80'}),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                                                                  "services", service_name + "-predictor"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.
                                                      format(service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/cifar_input.json')
    assert(np.argmax(res["predictions"]) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 11
0
def customEndpointSpec(custom_model_spec, service_account):
    env = (
        [
            client.V1EnvVar(name=i["name"], value=i["value"])
            for i in custom_model_spec["env"]
        ]
        if custom_model_spec.get("env", "")
        else None
    )
    ports = (
        [client.V1ContainerPort(container_port=int(custom_model_spec.get("port", "")))]
        if custom_model_spec.get("port", "")
        else None
    )
    containerSpec = client.V1Container(
        name=custom_model_spec.get("name", "custom-container"),
        image=custom_model_spec["image"],
        env=env,
        ports=ports,
        command=custom_model_spec.get("command", None),
        args=custom_model_spec.get("args", None),
        image_pull_policy=custom_model_spec.get("image_pull_policy", None),
        working_dir=custom_model_spec.get("working_dir", None),
    )
    return V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            custom=V1alpha2CustomSpec(container=containerSpec),
            service_account_name=service_account,
        )
    )
Esempio n. 12
0
def test_sklearn_kfserving():
    service_name = 'isvc-xgboost'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                storage_uri='gs://kfserving-samples/models/xgboost/iris',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    probs = predict(service_name, './data/iris_input.json')
    assert (probs == [1, 1])
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def customEndpointSpec(custom_model_spec, service_account, min_replicas,
                       max_replicas):
    env = ([
        client.V1EnvVar(name=i["name"], value=i["value"])
        for i in custom_model_spec["env"]
    ] if custom_model_spec.get("env", "") else None)
    ports = ([
        client.V1ContainerPort(container_port=int(
            custom_model_spec.get("port", "")),
                               protocol="TCP")
    ] if custom_model_spec.get("port", "") else None)
    resources = (client.V1ResourceRequirements(
        requests=(
            custom_model_spec["resources"]["requests"] if
            custom_model_spec.get('resources', {}).get('requests') else None),
        limits=(
            custom_model_spec["resources"]["limits"] if custom_model_spec.get(
                'resources', {}).get('limits') else None),
    ) if custom_model_spec.get("resources", {}) else None)
    containerSpec = client.V1Container(
        name=custom_model_spec.get("name", "custom-container"),
        image=custom_model_spec["image"],
        env=env,
        ports=ports,
        command=custom_model_spec.get("command", None),
        args=custom_model_spec.get("args", None),
        image_pull_policy=custom_model_spec.get("image_pull_policy", None),
        working_dir=custom_model_spec.get("working_dir", None),
        resources=resources)
    return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        custom=V1alpha2CustomSpec(container=containerSpec),
        service_account_name=service_account,
        min_replicas=(min_replicas if min_replicas >= 0 else None),
        max_replicas=(max_replicas if max_replicas > 0
                      and max_replicas >= min_replicas else None)))
def test_tabular_explainer():
    service_name = 'isvc-explainer-tabular'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec(
            storage_uri='gs://seldon-models/sklearn/income/model',
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))),
        explainer=V1alpha2ExplainerSpec(
            min_replicas=1,
            alibi=V1alpha2AlibiExplainerSpec(
                type='AnchorTabular',
                storage_uri=
                'gs://seldon-models/sklearn/income/explainer-py36-0.5.2',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/income_input.json')
    assert (res["predictions"] == [0])
    precision = explain(service_name, './data/income_input.json')
    assert (precision > 0.9)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 15
0
def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))),
        transformer=V1alpha2TransformerSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                image=
                '809251082950.dkr.ecr.us-west-2.amazonaws.com/kfserving/image-transformer:latest',
                name='kfserving-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 })))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/transformer.json')
    assert (np.argmax(res["predictions"]) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def generate_kfservice():
    tf_spec = V1alpha2TensorflowSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers')
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=tf_spec))

    kfsvc = V1alpha2KFService(
        api_version='serving.kubeflow.org/v1alpha2',
        kind='KFService',
        metadata=client.V1ObjectMeta(name='flower-sample'),
        spec=V1alpha2KFServiceSpec(default=default_endpoint_spec))
    return kfsvc
Esempio n. 17
0
def EndpointSpec(framework, storage_uri, service_account):
    if framework == "tensorflow":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "pytorch":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "sklearn":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "xgboost":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "onnx":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "tensorrt":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    else:
        raise ("Error: No matching framework: " + framework)
Esempio n. 18
0
def EndpointSpec(framework,
                 storage_uri,
                 service_account_name="k8s-sa",
                 transformer_custom_image=""):
    if framework == 'tensorflow':
        return V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                service_account_name=service_account_name,
                tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)),
            transformer=V1alpha2TransformerSpec(
                min_replicas=1,
                custom=V1alpha2CustomSpec(container=client.V1Container(
                    image=transformer_custom_image,
                    name="kfserving-container"))))
    elif framework == 'pytorch':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri)))
    elif framework == 'sklearn':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri)))
    elif framework == 'xgboost':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri)))
    elif framework == 'onnx':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)))
    elif framework == 'tensorrt':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri)))
    else:
        raise ("Error: No matching framework: " + framework)
Esempio n. 19
0
    def generate_isvc(self):
        """ generate InferenceService """

        api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        default_predictor, canary_predictor = None, None

        if self.framework == 'custom':
            default_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_default_container)
        else:
            default_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.default_storage_uri)

        if self.framework != 'custom' and self.canary_storage_uri is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.canary_storage_uri)
        if self.framework == 'custom' and self.custom_canary_container is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_canary_container)

        if canary_predictor:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary=V1alpha2EndpointSpec(predictor=canary_predictor),
                canary_traffic_percent=self.canary_traffic_percent)
        else:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary_traffic_percent=self.canary_traffic_percent)

        return V1alpha2InferenceService(
            api_version=api_version,
            kind=constants.KFSERVING_KIND,
            metadata=k8s_client.V1ObjectMeta(
                name=self.isvc_name,
                generate_name=constants.KFSERVING_DEFAULT_NAME,
                namespace=self.namespace),
            spec=isvc_spec)
Esempio n. 20
0
def test_batcher():
    service_name = 'isvc-pytorch-batcher'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            batcher=V1alpha2Batcher(
                max_batch_size=32, max_latency=5000, timeout=60),
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name='Net',
                resources=V1ResourceRequirements(requests={
                    'cpu': '1000m',
                    'memory': '2Gi'
                },
                                                 limits={
                                                     'cpu': '1000m',
                                                     'memory': '2Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            KFServing.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(
            KFSERVING_TEST_NAMESPACE,
            label_selector='serving.kubeflow.org/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_res = [
            executor.submit(
                lambda: predict(service_name, './data/cifar_input.json'))
            for _ in range(4)
        ]
    results = [f.result()["batchId"] for f in future_res]
    assert (all(x == results[0] for x in results) == True)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 21
0
def test_lightgbm_kfserving():
    service_name = "isvc-lightgbm"
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            lightgbm=V1alpha2LightGBMSpec(
                storage_uri="gs://kfserving-examples/models/lightgbm",
                resources=V1ResourceRequirements(
                    requests={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                    limits={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                ),
            ),
        ))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  version=constants.KFSERVING_VERSION)
    except RuntimeError as e:
        KFServing.wait_isvc_ready(service_name,
                                  namespace=KFSERVING_TEST_NAMESPACE,
                                  version=constants.KFSERVING_V1BETA1_VERSION)
    try:
        res = predict(service_name,
                      "./data/iris_input_v3.json",
                      version=constants.KFSERVING_VERSION)
    except KeyError:
        res = predict(service_name,
                      "./data/iris_input_v3.json",
                      version=constants.KFSERVING_V1BETA1_VERSION)
    assert res["predictions"][0][0] > 0.5
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 22
0
def test_tabular_explainer():
    service_name = 'aix-explainer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            custom=V1alpha2CustomSpec(
                container=V1Container(
                    name="predictor",
                    image='aipipeline/rf-predictor:0.4.0',
                    command=["python", "-m", "rfserver", "--model_name", "aix-explainer"],
                    resources=V1ResourceRequirements(
                        requests={'cpu': '500m', 'memory': '1Gi'},
                        limits={'cpu': '500m', 'memory': '1Gi'})
                ))),
        explainer=V1alpha2ExplainerSpec(
            min_replicas=1,
            aix=V1alpha2AIXExplainerSpec(
                type='LimeImages',
                resources=V1ResourceRequirements(
                    requests={'cpu': '500m', 'memory': '1Gi'},
                    limits={'cpu': '500m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE, timeout_seconds=720)
    except RuntimeError as e:
        logging.info(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                     KFSERVING_TEST_NAMESPACE, "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.format(service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input.json')
    assert(res["predictions"] == [[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

    mask = explain_aix(service_name, './data/mnist_input.json')
    percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask))
    assert(percent_in_mask > 0.6)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
Esempio n. 23
0
    def roll(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()
        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username,
                ports=service.ports)
            return container

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        KFServing = KFServingClient()
        KFServing.rollout_canary(mykfservice.name,
                                 canary=canary_endpoint_spec,
                                 percent=mykfservice.canary_traffic_percent,
                                 namespace=namespace,
                                 timeout_seconds=120,
                                 version=crd_info['version'])

        flash(category='warning', message='滚动升级已配置,刷新查看当前流量比例')
        return redirect('/kfservice_modelview/list/')
Esempio n. 24
0
def customEndpointSpec(custom_model_spec):
    env = [
        client.V1EnvVar(name=i['name'], value=i['value'])
        for i in custom_model_spec['env']
    ] if custom_model_spec.get('env', '') else None
    ports = [
        client.V1ContainerPort(
            container_port=int(custom_model_spec.get('port', '')))
    ] if custom_model_spec.get('port', '') else None
    containerSpec = client.V1Container(
        name=custom_model_spec.get('name', 'custom-container'),
        image=custom_model_spec['image'],
        env=env,
        ports=ports,
        command=custom_model_spec.get('command', None),
        args=custom_model_spec.get('args', None),
        image_pull_policy=custom_model_spec.get('image_pull_policy', None),
        working_dir=custom_model_spec.get('working_dir', None))
    return V1alpha2EndpointSpec(custom=V1alpha2CustomSpec(
        container=containerSpec))
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
def deploy_model(namespace: str, trained_model_path: InputPath(str)):
    from kubernetes import client
    from kfserving import KFServingClient
    from kfserving import constants
    from kfserving import V1alpha2EndpointSpec
    from kfserving import V1alpha2PredictorSpec
    from kfserving import V1alpha2TensorflowSpec
    from kfserving import V1alpha2InferenceServiceSpec
    from kfserving import V1alpha2InferenceService
    from kubernetes.client import V1ResourceRequirements

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    inference_service_name = 'inference112cbk'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri=trained_model_path,
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=inference_service_name,
                                     namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing = KFServingClient()
    KFServing.create(isvc)
    print('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
    print('Model deployed')
Esempio n. 27
0
def test_batcher():
    service_name = 'isvc-pytorch-batcher'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            batcher=V1alpha2Batcher(
                max_batchsize=32,
                max_latency=5000,
                timeout=60
            ),
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name='Net',
                resources=V1ResourceRequirements(
                    requests={'cpu': '2000m', 'memory': '2Gi'},
                    limits={'cpu': '2000m', 'memory': '2Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name,
                                        namespace=KFSERVING_TEST_NAMESPACE
                                    ),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    KFServing.create(isvc)
    try:
        KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    except RuntimeError as e:
        print(KFServing.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KFSERVING_TEST_NAMESPACE,
                                                                  "services", service_name + "-predictor-default"))
        pods = KFServing.core_api.list_namespaced_pod(KFSERVING_TEST_NAMESPACE,
                                                      label_selector='serving.kubeflow.org/inferenceservice={}'.
                                                      format(service_name))
        for pod in pods.items:
            print(pod)
        raise e
    time.sleep(30)
    probs = predict(service_name, './data/cifar_input.json')
    assert(np.argmax(probs) == 3)
    KFServing.delete(service_name, KFSERVING_TEST_NAMESPACE)
def EndpointSpec(framework, storage_uri, service_account, min_replicas,
                 max_replicas):

    endpointSpec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        service_account_name=service_account,
        min_replicas=(min_replicas if min_replicas >= 0 else None),
        max_replicas=(max_replicas if max_replicas > 0
                      and max_replicas >= min_replicas else None)))
    if framework == "tensorflow":
        endpointSpec.predictor.tensorflow = V1alpha2TensorflowSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "pytorch":
        endpointSpec.predictor.pytorch = V1alpha2PyTorchSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "sklearn":
        endpointSpec.predictor.sklearn = V1alpha2SKLearnSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "xgboost":
        endpointSpec.predictor.xgboost = V1alpha2XGBoostSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "onnx":
        endpointSpec.predictor.onnx = V1alpha2ONNXSpec(storage_uri=storage_uri)
        return endpointSpec

    elif framework == "tensorrt":
        endpointSpec.predictor.tensorrt = V1alpha2TensorRTSpec(
            storage_uri=storage_uri)
        return endpointSpec

    else:
        raise ("Error: No matching framework: " + framework)
Esempio n. 29
0
def deploy_model(namespace, model_file_name, gcp_bucket):

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'xgboost-r' + now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name,
                storage_uri='gs://' + gcp_bucket + '/rmodel',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=inference_service_name,
            namespace=namespace,
            annotations={'sidecar.istio.io/inject': 'false'}),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
Esempio n. 30
0
    def run(self):
        logger.info("Retrieving kfserving client")
        client = KFServingClient()

        logger.info("Specifying canary")
        canary = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                name=Serve.SERVICE_NAME,
                image="{}:{}".format(Pipeline.DEPLOY_IMAGE, self.args.tag),
                image_pull_policy="Always",
            ))))

        logger.info("Rolling out canary deployment")
        client.rollout_canary(Serve.SERVICE_NAME,
                              canary=canary,
                              percent=50,
                              namespace=Rollout.NAMESPACE,
                              watch=True)

        logger.info("Promoting canary deployment")
        client.promote(Serve.SERVICE_NAME,
                       namespace=Rollout.NAMESPACE,
                       watch=True)