예제 #1
0
def test_canary_rollout():
    service_name = 'isvc-canary'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(
                    requests={'cpu': '100m', 'memory': '256Mi'},
                    limits={'cpu': '100m', 'memory': '256Mi'}))))

    isvc = V1alpha2InferenceService(api_version=constants.KFSERVING_API_VERSION,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KFSERVING_TEST_NAMESPACE),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # define canary endpoint spec, and then rollout 10% traffic to the canary version
    canary_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2',
                resources=V1ResourceRequirements(
                    requests={'cpu':'100m','memory':'256Mi'},
                    limits={'cpu':'100m', 'memory':'256Mi'}))))

    KFServing.rollout_canary(service_name, canary=canary_endpoint_spec, percent=10,
                             namespace=KFSERVING_TEST_NAMESPACE, watch=True, timeout_seconds=120)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
예제 #2
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
예제 #3
0
def EndpointSpec(framework, storage_uri, service_account):
    if framework == "tensorflow":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "pytorch":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "sklearn":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "xgboost":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "onnx":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    elif framework == "tensorrt":
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri),
            service_account_name=service_account,
        ))
    else:
        raise ("Error: No matching framework: " + framework)
예제 #4
0
def EndpointSpec(framework,
                 storage_uri,
                 service_account_name="k8s-sa",
                 transformer_custom_image=""):
    if framework == 'tensorflow':
        return V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                service_account_name=service_account_name,
                tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)),
            transformer=V1alpha2TransformerSpec(
                min_replicas=1,
                custom=V1alpha2CustomSpec(container=client.V1Container(
                    image=transformer_custom_image,
                    name="kfserving-container"))))
    elif framework == 'pytorch':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri)))
    elif framework == 'sklearn':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri)))
    elif framework == 'xgboost':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri)))
    elif framework == 'onnx':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)))
    elif framework == 'tensorrt':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri)))
    else:
        raise ("Error: No matching framework: " + framework)
예제 #5
0
def test_tensorflow_kfserving():
    service_name = 'isvc-tensorflow'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            tensorflow=V1alpha2TensorflowSpec(
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '1',
                    'memory': '2Gi'
                },
                                                 limits={
                                                     'cpu': '1',
                                                     'memory': '2Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KFSERVING_TEST_NAMESPACE),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing.create(isvc)
    KFServing.wait_isvc_ready(service_name, namespace=KFSERVING_TEST_NAMESPACE)
    probs = predict(service_name, './data/flower_input.json')
    assert (np.argmax(probs[0].get('scores')) == 0)

    # Delete the InferenceService
    KFServing.delete(service_name, namespace=KFSERVING_TEST_NAMESPACE)
예제 #6
0
 def generate_predictor_spec(self,
                             framework,
                             storage_uri=None,
                             container=None):
     '''Generate predictor spec according to framework and
        default_storage_uri or custom container.
     '''
     if self.framework == 'tensorflow':
         predictor = V1alpha2PredictorSpec(
             tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))
     elif self.framework == 'onnx':
         predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec(
             storage_uri=storage_uri))
     elif self.framework == 'pytorch':
         predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec(
             storage_uri=storage_uri))
     elif self.framework == 'sklearn':
         predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec(
             storage_uri=storage_uri))
     elif self.framework == 'triton':
         predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec(
             storage_uri=storage_uri))
     elif self.framework == 'xgboost':
         predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec(
             storage_uri=storage_uri))
     elif self.framework == 'custom':
         predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
             container=container))
     else:
         raise RuntimeError("Unsupported framework {}".format(framework))
     return predictor
예제 #7
0
def create_inference_service(namespace: str, name: str, storage_uri: str,
                             runtime_version: str, service_account_name: str):
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            service_account_name=service_account_name,
            tensorflow=V1alpha2TensorflowSpec(runtime_version=runtime_version,
                                              storage_uri=storage_uri,
                                              resources=V1ResourceRequirements(
                                                  requests={
                                                      'cpu': '100m',
                                                      'memory': '1Gi'
                                                  },
                                                  limits={
                                                      'cpu': '100m',
                                                      'memory': '1Gi'
                                                  }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=name, namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
예제 #8
0
 def run(self):
     parser = argparse.ArgumentParser()
     parser.add_argument('--namespace', required=False, default='kubeflow')
     # pvc://${PVCNAME}/dir
     parser.add_argument('--storage_uri', required=False, default='/mnt/export')
     parser.add_argument('--name', required=False, default='kfserving-sample')        
     args = parser.parse_args()
     namespace = args.namespace
     serving_name =  args.name
     
     api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
     default_endpoint_spec = V1alpha2EndpointSpec(
                               predictor=V1alpha2PredictorSpec(
                                 tensorflow=V1alpha2TensorflowSpec(
                                   storage_uri=args.storage_uri,
                                   resources=V1ResourceRequirements(
                                       requests={'cpu':'100m','memory':'1Gi'},
                                       limits={'cpu':'100m', 'memory':'1Gi'}))))
     isvc = V1alpha2InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=serving_name, namespace=namespace),
                               spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))        
     
     KFServing = KFServingClient()
     KFServing.create(isvc)
     print('waiting 5 sec for Creating InferenceService')
     time.sleep(5)
     
     KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
예제 #9
0
def generate_kfservice():
    default_model_spec = V1alpha2ModelSpec(tensorflow=V1alpha2TensorflowSpec(
        model_uri='gs://kfserving-samples/models/tensorflow/flowers'))

    kfsvc = V1alpha2KFService(
        api_version='serving.kubeflow.org/v1alpha1',
        kind='KFService',
        metadata=client.V1ObjectMeta(name='flower-sample'),
        spec=V1alpha2KFServiceSpec(default=default_model_spec))
    return kfsvc
예제 #10
0
def generate_kfservice():
    tf_spec = V1alpha2TensorflowSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers')
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=tf_spec))

    kfsvc = V1alpha2KFService(
        api_version='serving.kubeflow.org/v1alpha2',
        kind='KFService',
        metadata=client.V1ObjectMeta(name='flower-sample'),
        spec=V1alpha2KFServiceSpec(default=default_endpoint_spec))
    return kfsvc
예제 #11
0
def EndpointSpec(framework, storage_uri):
    if framework == 'tensorflow':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)))
    elif framework == 'pytorch':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri)))
    elif framework == 'sklearn':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri)))
    elif framework == 'xgboost':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri)))
    elif framework == 'onnx':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)))
    elif framework == 'tensorrt':
        return V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            tensorrt=V1alpha2TensorRTSpec(storage_uri=storage_uri)))
    else:
        raise ("Error: No matching framework: " + framework)
def deploy_model(namespace: str, trained_model_path: InputPath(str)):
    from kubernetes import client
    from kfserving import KFServingClient
    from kfserving import constants
    from kfserving import V1alpha2EndpointSpec
    from kfserving import V1alpha2PredictorSpec
    from kfserving import V1alpha2TensorflowSpec
    from kfserving import V1alpha2InferenceServiceSpec
    from kfserving import V1alpha2InferenceService
    from kubernetes.client import V1ResourceRequirements

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    inference_service_name = 'inference112cbk'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri=trained_model_path,
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=inference_service_name,
                                     namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing = KFServingClient()
    KFServing.create(isvc)
    print('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
    print('Model deployed')
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
def EndpointSpec(framework, storage_uri, service_account, min_replicas,
                 max_replicas):

    endpointSpec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
        service_account_name=service_account,
        min_replicas=(min_replicas if min_replicas >= 0 else None),
        max_replicas=(max_replicas if max_replicas > 0
                      and max_replicas >= min_replicas else None)))
    if framework == "tensorflow":
        endpointSpec.predictor.tensorflow = V1alpha2TensorflowSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "pytorch":
        endpointSpec.predictor.pytorch = V1alpha2PyTorchSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "sklearn":
        endpointSpec.predictor.sklearn = V1alpha2SKLearnSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "xgboost":
        endpointSpec.predictor.xgboost = V1alpha2XGBoostSpec(
            storage_uri=storage_uri)
        return endpointSpec

    elif framework == "onnx":
        endpointSpec.predictor.onnx = V1alpha2ONNXSpec(storage_uri=storage_uri)
        return endpointSpec

    elif framework == "tensorrt":
        endpointSpec.predictor.tensorrt = V1alpha2TensorRTSpec(
            storage_uri=storage_uri)
        return endpointSpec

    else:
        raise ("Error: No matching framework: " + framework)
예제 #15
0
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements

namespace = utils.get_default_target_namespace()

api_version = constants.KFSERVING_GROUP + "/" + constants.KFSERVING_VERSION
default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2TensorflowSpec(
        storage_uri="s3://anonymous-model-result/result/saved_model",
        resources=V1ResourceRequirements(requests={
            "cpu": "100m",
            "memory": "1Gi"
        },
                                         limits={
                                             "cpu": "100m",
                                             "memory": "1Gi"
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()
KFServing.set_credentials(
    storage_type="S3",
    namespace='anonymous',