Example #1
0
    def delete_isvc(self, name, namespace):
        """Delete the provided InferenceService in the specified namespace.

        :param name: The custom object
        :param namespace: The custom resource
        :returns: object: The deleted InferenceService.

        """
        KFServing = KFServingClient(
            config_file=self.config_file,
            context=self.context,
            client_configuration=self.client_configuration,
            persist_config=self.persist_config)
        return KFServing.delete(name, namespace=namespace)
Example #2
0
def test_set_credentials_gcp():
    '''Test GCP credentials creating'''
    KFServing = KFServingClient()
    sa_name = constants.DEFAULT_SA_NAME
    KFServing.set_credentials(
        storage_type='gcs',
        namespace=KFSERVING_TEST_NAMESPACE,
        credentials_file='./credentials/gcp_credentials.json',
        sa_name=sa_name)
    created_sa = get_created_sa(sa_name)
    created_secret_name = created_sa.secrets[0].name
    created_secret = get_created_secret(created_secret_name)
    assert created_secret.data[
        constants.GCS_CREDS_FILE_DEFAULT_NAME] == gcp_testing_creds
Example #3
0
    def roll(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()
        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username,
                ports=service.ports)
            return container

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        KFServing = KFServingClient()
        KFServing.rollout_canary(mykfservice.name,
                                 canary=canary_endpoint_spec,
                                 percent=mykfservice.canary_traffic_percent,
                                 namespace=namespace,
                                 timeout_seconds=120,
                                 version=crd_info['version'])

        flash(category='warning', message='滚动升级已配置,刷新查看当前流量比例')
        return redirect('/kfservice_modelview/list/')
Example #4
0
def test_azure_credentials():
    '''Test Azure credentials creating'''
    KFServing = KFServingClient()
    sa_name = constants.DEFAULT_SA_NAME
    KFServing.set_credentials(
        storage_type='Azure',
        namespace=KFSERVING_TEST_NAMESPACE,
        credentials_file='./credentials/azure_credentials.json',
        sa_name=sa_name)
    created_sa = get_created_sa(sa_name)
    created_secret_name = created_sa.secrets[0].name
    created_secret = get_created_secret(created_secret_name)
    assert created_secret.data['AZ_CLIENT_ID'] == 'dXNlcgo='
    assert created_secret.data['AZ_CLIENT_SECRET'] == 'cGFzc3dvcmQ='
    assert created_secret.data[
        'AZ_SUBSCRIPTION_ID'] == 'MzMzMzMzMzMtMzMzMy0zMzMzLTMzMzMtMzMzMzMz'
    assert created_secret.data['AZ_TENANT_ID'] == 'MTIzNAo='
Example #5
0
def test_azure_credentials():
    '''Test Azure credentials creating'''
    KFServing = KFServingClient()
    sa_name = constants.DEFAULT_SA_NAME
    KFServing.set_credentials(storage_type='Azure',
                              namespace='kubeflow',
                              credentials_file='./azure_credentials.json',
                              sa_name=sa_name)
    created_sa = get_created_sa(sa_name)
    created_secret_name = created_sa.secrets[0].name
    created_secret = get_created_secret(created_secret_name)
    assert created_secret.data[
        'AZ_CLIENT_ID'] == 'YTJhYjExYWYtMDFhYS00NzU5LTgzNDUtNzgwMzI4N2RiZD'
    assert created_secret.data['AZ_CLIENT_SECRET'] == 'password'
    assert created_secret.data[
        'AZ_SUBSCRIPTION_ID'] == 'MzMzMzMzMzMtMzMzMy0zMzMzLTMzMzMtMzMzMzMz'
    assert created_secret.data[
        'AZ_TENANT_ID'] == 'QUJDREVGR0gtMTIzNC0xMjM0LTEyMzQtQUJDREVGR0hJSk'
Example #6
0
def deploy_model(action,
                 model_name,
                 default_model_uri,
                 canary_model_uri,
                 canary_model_traffic,
                 namespace,
                 framework,
                 default_custom_model_spec,
                 canary_custom_model_spec,
                 autoscaling_target=0):
    if int(autoscaling_target) != 0:
        annotations = {
            "autoscaling.knative.dev/target": str(autoscaling_target)
        }
    else:
        annotations = None
    metadata = client.V1ObjectMeta(name=model_name,
                                   namespace=namespace,
                                   annotations=annotations)
    if framework != 'custom':
        default_model_spec = ModelSpec(framework, default_model_uri)
    else:
        default_model_spec = customModelSpec(default_custom_model_spec)
    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = ModelSpec(framework, canary_model_uri)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customModelSpec(canary_custom_model_spec)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    else:
        kfsvc = kfserving_deployment(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
Example #7
0
    def create_isvc(self, namespace, isvc):
        """Create the provided InferenceService in the specified namespace.

        :param namespace: The custom resource
        :param InferenceService: The InferenceService body
        :returns: object: Created InferenceService.

        """
        KFServing = KFServingClient()
        try:
            created_isvc = KFServing.create(isvc, namespace=namespace)
            isvc_name = created_isvc['metadata']['name']
            isvc_namespace = created_isvc['metadata']['namespace']
            KFServing.get(isvc_name, isvc_namespace, watch=True)
            return created_isvc
        except client.rest.ApiException:
            raise RuntimeError("Failed to create InferenceService. Perhaps the CRD "
                               "InferenceService version {} is not installed? "\
                                   .format(constants.KFSERVING_VERSION))
Example #8
0
def test_set_credentials_s3():
    """Test S3 credentials creating."""
    kfserving = KFServingClient()
    credentials_file = './credentials/aws_credentials'

    # Test creating service account case.
    sa_name = constants.DEFAULT_SA_NAME
    if check_sa_exists(sa_name):
        delete_sa(sa_name)

    kfserving.set_credentials(storage_type='s3',
                              namespace=KFSERVING_TEST_NAMESPACE,
                              credentials_file=credentials_file,
                              s3_profile='default',
                              s3_endpoint='s3.us-west-2.amazonaws.com',
                              s3_region='us-west-2',
                              s3_use_https='1',
                              s3_verify_ssl='0')

    sa_body = get_created_sa(sa_name)
    created_secret_name = sa_body.secrets[0].name
    created_secret = get_created_secret(created_secret_name)

    config = configparser.ConfigParser()
    config.read([expanduser(credentials_file)])
    s3_access_key_id = config.get('default', 'aws_access_key_id')
    s3_secret_access_key = config.get('default', 'aws_secret_access_key')

    assert created_secret.data[
        constants.S3_ACCESS_KEY_ID_DEFAULT_NAME] == s3_access_key_id
    assert created_secret.data[
        constants.S3_SECRET_ACCESS_KEY_DEFAULT_NAME] == s3_secret_access_key
    assert created_secret.metadata.annotations[
        constants.KFSERVING_GROUP +
        '/s3-endpoint'] == 's3.us-west-2.amazonaws.com'
    assert created_secret.metadata.annotations[constants.KFSERVING_GROUP +
                                               '/s3-region'] == 'us-west-2'
    assert created_secret.metadata.annotations[constants.KFSERVING_GROUP +
                                               '/s3-usehttps'] == '1'
    assert created_secret.metadata.annotations[constants.KFSERVING_GROUP +
                                               '/s3-verifyssl'] == '0'
Example #9
0
def create_inference_service(namespace: str, name: str, storage_url: str,
                             runtime_version: str, service_account_name: str):
    api_version = os.path.join(constants.KFSERVING_GROUP,
                               constants.KFSERVING_VERSION)

    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            service_account_name=service_account_name,
            triton=V1alpha2TritonSpec(runtime_version=runtime_version,
                                      storage_uri=storage_url,
                                      resources=V1ResourceRequirements(
                                          requests={
                                              'cpu': '100m',
                                              'memeory': '1Gi'
                                          },
                                          limits={
                                              'cpu': '100m',
                                              'memory': '1Gi'
                                          }))))
    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=name, namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
 def run(self):
     parser = argparse.ArgumentParser()
     parser.add_argument('--namespace', required=False, default='kubeflow')
     # pvc://${PVCNAME}/dir
     parser.add_argument('--storage_uri', required=False, default='/mnt/export')
     parser.add_argument('--name', required=False, default='kfserving-sample')        
     args = parser.parse_args()
     namespace = args.namespace
     serving_name =  args.name
     
     api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
     default_endpoint_spec = V1alpha2EndpointSpec(
                               predictor=V1alpha2PredictorSpec(
                                 tensorflow=V1alpha2TensorflowSpec(
                                   storage_uri=args.storage_uri,
                                   resources=V1ResourceRequirements(
                                       requests={'cpu':'100m','memory':'1Gi'},
                                       limits={'cpu':'100m', 'memory':'1Gi'}))))
     isvc = V1alpha2InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=serving_name, namespace=namespace),
                               spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))        
     
     KFServing = KFServingClient()
     KFServing.create(isvc)
     print('waiting 5 sec for Creating InferenceService')
     time.sleep(5)
     
     KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
def deploy_model(namespace: str, trained_model_path: InputPath(str)):
    from kubernetes import client
    from kfserving import KFServingClient
    from kfserving import constants
    from kfserving import V1alpha2EndpointSpec
    from kfserving import V1alpha2PredictorSpec
    from kfserving import V1alpha2TensorflowSpec
    from kfserving import V1alpha2InferenceServiceSpec
    from kfserving import V1alpha2InferenceService
    from kubernetes.client import V1ResourceRequirements

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    inference_service_name = 'inference112cbk'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri=trained_model_path,
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=inference_service_name,
                                     namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing = KFServingClient()
    KFServing.create(isvc)
    print('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
    print('Model deployed')
Example #13
0
def deploy_model(namespace, model_file_name, gcp_bucket):

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'xgboost-r' + now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name,
                storage_uri='gs://' + gcp_bucket + '/rmodel',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=inference_service_name,
            namespace=namespace,
            annotations={'sidecar.istio.io/inject': 'false'}),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
Example #14
0
    def run(self):
        logger.info("Retrieving kfserving client")
        client = KFServingClient()

        logger.info("Specifying canary")
        canary = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            custom=V1alpha2CustomSpec(container=V1Container(
                name=Serve.SERVICE_NAME,
                image="{}:{}".format(Pipeline.DEPLOY_IMAGE, self.args.tag),
                image_pull_policy="Always",
            ))))

        logger.info("Rolling out canary deployment")
        client.rollout_canary(Serve.SERVICE_NAME,
                              canary=canary,
                              percent=50,
                              namespace=Rollout.NAMESPACE,
                              watch=True)

        logger.info("Promoting canary deployment")
        client.promote(Serve.SERVICE_NAME,
                       namespace=Rollout.NAMESPACE,
                       watch=True)
Example #15
0
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION

default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2CustomSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
        resources=V1ResourceRequirements(requests={
            'cpu': '100m',
            'memory': '1Gi'
        },
                                         limits={
                                             'cpu': '100m',
                                             'memory': '1Gi'
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name='flower-sample', namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()

KFServing.create(isvc)

KFServing.get('flower-sample',
              namespace=namespace,
              watch=True,
              timeout_seconds=120)

# KFServing.delete('flower-sample', namespace=namespace)
Example #16
0
from kfserving import KFServingClient
from kfserving import constants
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TransformerSpec
from kfserving import V1alpha2PyTorchSpec
from kfserving import V1alpha2CustomSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements
from kubernetes.client import V1Container
from ..common.utils import predict
from ..common.utils import KFSERVING_TEST_NAMESPACE

api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
KFServing = KFServingClient(config_file="~/.kube/config")


def test_transformer():
    service_name = 'isvc-transformer'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            pytorch=V1alpha2PyTorchSpec(
                storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                model_class_name="Net",
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest.mock import patch

from kubernetes import client

from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2KFServiceSpec
from kfserving import V1alpha2KFService
from kfserving import KFServingClient

KFServing = KFServingClient()

mocked_unit_result = \
'''
{
    "api_version": "serving.kubeflow.org/v1alpha2",
    "kind": "KFService",
    "metadata": {
        "name": "flower-sample",
        "namespace": "kubeflow"
    },
    "spec": {
        "default": {
            "predictor": {
                "tensorflow": {
                    "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers"
Example #18
0
default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2TensorflowSpec(
        storage_uri="s3://anonymous-model-result/result/saved_model",
        resources=V1ResourceRequirements(requests={
            "cpu": "100m",
            "memory": "1Gi"
        },
                                         limits={
                                             "cpu": "100m",
                                             "memory": "1Gi"
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()
KFServing.set_credentials(
    storage_type="S3",
    namespace='anonymous',
    credentials_file='credentials',
    s3_profile="default",
    s3_endpoint="minio-service.kubeflow.svc.cluster.local:9000",
    s3_region="us-west-1",
    s3_use_https="0",
    s3_verify_ssl="0")

KFServing.create(isvc)
Example #19
0
def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace,
                   framework, custom_model_spec, service_account, inferenceservice_yaml,
                   request_timeout, autoscaling_target=0, enable_istio_sidecar=True,
                   watch_timeout=300, min_replicas=0, max_replicas=0):
    """
    Perform the specified action. If the action is not 'delete' and `inferenceService_yaml`
    was provided, the dict representation of the YAML will be sent directly to the
    Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the
    provided input and then sent for creation/update.
    :return InferenceService JSON output
    """
    kfs_client = KFServingClient()

    if inferenceservice_yaml:
        # Overwrite name and namespace if exists
        if namespace:
            inferenceservice_yaml['metadata']['namespace'] = namespace

        if model_name:
            inferenceservice_yaml['metadata']['name'] = model_name
        else:
            model_name = inferenceservice_yaml['metadata']['name']

        kfsvc = inferenceservice_yaml

    elif action != 'delete':
        # Create annotations
        annotations = {}
        if int(autoscaling_target) != 0:
            annotations["autoscaling.knative.dev/target"] = str(autoscaling_target)
        if not enable_istio_sidecar:
            annotations["sidecar.istio.io/inject"] = 'false'
        if not annotations:
            annotations = None
        metadata = client.V1ObjectMeta(
            name=model_name, namespace=namespace, annotations=annotations
        )

        # If a custom model container spec was provided, build the V1Container
        # object using it.
        containers = []
        if custom_model_spec:
            containers = [create_custom_container_spec(custom_model_spec)]

        # Build the V1beta1PredictorSpec.
        predictor_spec = create_predictor_spec(
            framework, model_uri, canary_traffic_percent, service_account,
            min_replicas, max_replicas, containers, request_timeout
        )

        kfsvc = create_inference_service(metadata, predictor_spec)

    if action == "create":
        submit_api_request(kfs_client, 'create', model_name, kfsvc, namespace,
                           watch=True, timeout_seconds=watch_timeout)
    elif action == "update":
        submit_api_request(kfs_client, 'update', model_name, kfsvc, namespace,
                           watch=True, timeout_seconds=watch_timeout)
    elif action == "apply":
        try:
            submit_api_request(kfs_client, 'create', model_name, kfsvc, namespace,
                               watch=True, timeout_seconds=watch_timeout)
        except Exception:
            submit_api_request(kfs_client, 'update', model_name, kfsvc, namespace,
                               watch=True, timeout_seconds=watch_timeout)
    elif action == "delete":
        kfs_client.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = kfs_client.get(model_name, namespace=namespace)
    return model_status
def deploy_model(action,
                 model_name,
                 default_model_uri,
                 canary_model_uri,
                 canary_model_traffic,
                 namespace,
                 framework,
                 default_custom_model_spec,
                 canary_custom_model_spec,
                 service_account,
                 autoscaling_target=0,
                 enable_istio_sidecar=True,
                 inferenceservice_yaml={},
                 watch_timeout=120,
                 min_replicas=0,
                 max_replicas=0):
    KFServing = KFServingClient()

    if inferenceservice_yaml:
        # Overwrite name and namespace if exist
        if namespace:
            inferenceservice_yaml['metadata']['namespace'] = namespace
        if model_name:
            inferenceservice_yaml['metadata']['name'] = model_name
        kfsvc = inferenceservice_yaml
    else:
        # Create annotation
        annotations = {}
        if int(autoscaling_target) != 0:
            annotations["autoscaling.knative.dev/target"] = str(
                autoscaling_target)
        if not enable_istio_sidecar:
            annotations["sidecar.istio.io/inject"] = 'false'
        if not annotations:
            annotations = None
        metadata = client.V1ObjectMeta(name=model_name,
                                       namespace=namespace,
                                       annotations=annotations)

        # Create Default deployment if default model uri is provided.
        if framework != "custom" and default_model_uri:
            default_model_spec = EndpointSpec(framework, default_model_uri,
                                              service_account, min_replicas,
                                              max_replicas)
        elif framework == "custom" and default_custom_model_spec:
            default_model_spec = customEndpointSpec(default_custom_model_spec,
                                                    service_account,
                                                    min_replicas, max_replicas)

        # Create Canary deployment if canary model uri is provided.
        if framework != "custom" and canary_model_uri:
            canary_model_spec = EndpointSpec(framework, canary_model_uri,
                                             service_account, min_replicas,
                                             max_replicas)
            kfsvc = InferenceService(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
        elif framework == "custom" and canary_custom_model_spec:
            canary_model_spec = customEndpointSpec(canary_custom_model_spec,
                                                   service_account,
                                                   min_replicas, max_replicas)
            kfsvc = InferenceService(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
        else:
            kfsvc = InferenceService(metadata, default_model_spec)

    def create(kfsvc, model_name, namespace):
        KFServing.create(kfsvc, namespace=namespace)
        time.sleep(1)
        KFServing.get(model_name,
                      namespace=namespace,
                      watch=True,
                      timeout_seconds=watch_timeout)

    def update(kfsvc, model_name, namespace):
        KFServing.patch(model_name, kfsvc, namespace=namespace)
        time.sleep(1)
        KFServing.get(model_name,
                      namespace=namespace,
                      watch=True,
                      timeout_seconds=watch_timeout)

    if action == "create":
        create(kfsvc, model_name, namespace)
    elif action == "update":
        update(kfsvc, model_name, namespace)
    elif action == "apply":
        try:
            create(kfsvc, model_name, namespace)
        except:
            update(kfsvc, model_name, namespace)
    elif action == "rollout":
        if inferenceservice_yaml:
            raise ("Rollout is not supported for inferenceservice yaml")
        KFServing.rollout_canary(
            model_name,
            canary=canary_model_spec,
            percent=canary_model_traffic,
            namespace=namespace,
            watch=True,
            timeout_seconds=watch_timeout,
        )
    elif action == "promote":
        KFServing.promote(model_name,
                          namespace=namespace,
                          watch=True,
                          timeout_seconds=watch_timeout)
    elif action == "delete":
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
Example #21
0
def get_kfserving_client():
    client = KFServingClient()
    return client
Example #22
0
def deploy_model(action,
                 model_name,
                 default_model_uri,
                 canary_model_uri,
                 canary_model_traffic,
                 namespace,
                 framework,
                 default_custom_model_spec,
                 canary_custom_model_spec,
                 autoscaling_target=0):
    if int(autoscaling_target) != 0:
        annotations = {
            "autoscaling.knative.dev/target": str(autoscaling_target)
        }
    else:
        annotations = None
    metadata = client.V1ObjectMeta(name=model_name,
                                   namespace=namespace,
                                   annotations=annotations)

    # Create Default deployment if default model uri is provided.
    if framework != 'custom' and default_model_uri:
        default_model_spec = EndpointSpec(framework, default_model_uri)
    elif framework == 'custom' and default_custom_model_spec:
        default_model_spec = customEndpointSpec(default_custom_model_spec)

    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = EndpointSpec(framework, canary_model_uri)
        kfsvc = InferenceService(metadata, default_model_spec,
                                 canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customEndpointSpec(canary_custom_model_spec)
        kfsvc = InferenceService(metadata, default_model_spec,
                                 canary_model_spec, canary_model_traffic)
    else:
        kfsvc = InferenceService(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc, watch=True, timeout_seconds=120)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'rollout':
        KFServing.rollout_canary(model_name,
                                 canary=canary_model_spec,
                                 percent=canary_model_traffic,
                                 namespace=namespace,
                                 watch=True,
                                 timeout_seconds=120)
    elif action == 'promote':
        KFServing.promote(model_name,
                          namespace=namespace,
                          watch=True,
                          timeout_seconds=120)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
Example #23
0
        default_model_spec = ModelSpec(framework, default_model_uri)
    else:
        default_model_spec = customModelSpec(default_custom_model_spec)
    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = ModelSpec(framework, canary_model_uri)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customModelSpec(canary_custom_model_spec)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    else:
        kfsvc = kfserving_deployment(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    print(model_status)

    if not os.path.exists(os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))
Example #24
0
def deploy_model(
    action,
    model_name,
    default_model_uri,
    canary_model_uri,
    canary_model_traffic,
    namespace,
    framework,
    default_custom_model_spec,
    canary_custom_model_spec,
    service_account,
    autoscaling_target=0,
):
    if int(autoscaling_target) != 0:
        annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)}
    else:
        annotations = None
    metadata = client.V1ObjectMeta(
        name=model_name, namespace=namespace, annotations=annotations
    )

    # Create Default deployment if default model uri is provided.
    if framework != "custom" and default_model_uri:
        default_model_spec = EndpointSpec(framework, default_model_uri, service_account)
    elif framework == "custom" and default_custom_model_spec:
        default_model_spec = customEndpointSpec(
            default_custom_model_spec, service_account
        )

    # Create Canary deployment if canary model uri is provided.
    if framework != "custom" and canary_model_uri:
        canary_model_spec = EndpointSpec(framework, canary_model_uri, service_account)
        kfsvc = InferenceService(
            metadata, default_model_spec, canary_model_spec, canary_model_traffic
        )
    elif framework == "custom" and canary_custom_model_spec:
        canary_model_spec = customEndpointSpec(
            canary_custom_model_spec, service_account
        )
        kfsvc = InferenceService(
            metadata, default_model_spec, canary_model_spec, canary_model_traffic
        )
    else:
        kfsvc = InferenceService(metadata, default_model_spec)

    KFServing = KFServingClient()

    def create(kfsvc, model_name, namespace):
        KFServing.create(kfsvc)
        time.sleep(1)
        KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=120)

    def update(kfsvc, model_name, namespace):
        KFServing.patch(model_name, kfsvc)
        time.sleep(1)
        KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=120)

    if action == "create":
        create(kfsvc, model_name, namespace)
    elif action == "update":
        update(kfsvc, model_name, namespace)
    elif action == "apply":
        try:
            create(kfsvc, model_name, namespace)
        except:
            update(kfsvc, model_name, namespace)
    elif action == "rollout":
        KFServing.rollout_canary(
            model_name,
            canary=canary_model_spec,
            percent=canary_model_traffic,
            namespace=namespace,
            watch=True,
            timeout_seconds=120,
        )
    elif action == "promote":
        KFServing.promote(
            model_name, namespace=namespace, watch=True, timeout_seconds=120
        )
    elif action == "delete":
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
Example #25
0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from kfserving import KFServingClient

KFServing = KFServingClient(load_kube_config=True)


def wait_for_kfservice_ready(name,
                             namespace='kfserving-ci-e2e-test',
                             Timeout_seconds=600):
    for _ in range(round(Timeout_seconds / 10)):
        time.sleep(10)
        kfsvc_status = KFServing.get(name, namespace=namespace)
        for condition in kfsvc_status['status'].get('conditions', {}):
            if condition.get('type', '') == 'Ready':
                status = condition.get('status', 'Unknown')
        if status == 'True':
            return
    raise RuntimeError("Timeout to start the KFService.")
Example #26
0
    def deploy(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()

        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username)
            return container

        api_version = crd_info['group'] + '/' + crd_info['version']
        default_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.default_service.min_replicas,
                max_replicas=mykfservice.default_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.default_service,
                    mykfservice)))) if mykfservice.default_service else None

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        metadata = kubernetes.client.V1ObjectMeta(
            name=mykfservice.name,
            labels={
                "app": mykfservice.name,
                "rtx-user": mykfservice.created_by.username
            },
            namespace=namespace)

        isvc = V1alpha2InferenceService(
            api_version=api_version,
            kind=crd_info['kind'],
            metadata=metadata,
            spec=V1alpha2InferenceServiceSpec(
                default=default_endpoint_spec,
                canary=canary_endpoint_spec,
                canary_traffic_percent=mykfservice.canary_traffic_percent))

        KFServing = KFServingClient()
        try:
            KFServing.delete(mykfservice.name,
                             namespace=namespace,
                             version=crd_info['version'])
        except Exception as e:
            print(e)

        KFServing.create(isvc,
                         namespace=namespace,
                         version=crd_info['version'])

        flash(category='warning', message='部署启动,一分钟后部署完成')
        return redirect('/kfservice_modelview/list/')
Example #27
0
def main():

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
            container=V1Container(
                name="kfserving-container",
                image=FLAGS.image,
                env=[{
                    "name": "STORAGE_URI",
                    "value": "%s" % FLAGS.storage_uri
                }],
                resources=V1ResourceRequirements(
                    limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}),
                command=["python"],
                args=[
                    "model.py",
                    "--model-name",
                    "%s" % FLAGS.inference_name,
                    "--out_dir",
                    "%s" % FLAGS.model_path,
                    "--classes_file",
                    "%s" % FLAGS.classes_file,
                ]))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name,
                                         namespace=FLAGS.namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    # Create inference service
    KFServing = KFServingClient()
    KFServing.create(isvc)
    time.sleep(2)

    # Check inference service
    KFServing.get(FLAGS.inference_name,
                  namespace=FLAGS.namespace,
                  watch=True,
                  timeout_seconds=180)

    model_status = KFServing.get(FLAGS.inference_name,
                                 namespace=FLAGS.namespace)

    for condition in model_status["status"]["conditions"]:
        if condition['type'] == 'Ready':
            if condition['status'] == 'True':
                print('Model is ready')
                break
            else:
                print(
                    'Model is timed out, please check the inferenceservice events for more details.'
                )
                exit(1)
    try:
        print(
            model_status["status"]["url"] +
            " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands"
        )
        print("Sample test commands: ")
        print(
            "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT"
        )
        print(
            "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
        )
        # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample
        url = re.compile(r"https?://")
        host, path = url.sub("", model_status["status"]["url"]).split("/", 1)
        print('curl -X GET -H "Host: ' + host +
              '" http://$ISTIO_INGRESS_ENDPOINT/' + path)
    except:
        print("Model is not ready, check the logs for the Knative URL status.")
        exit(1)
Example #28
0
    V1alpha2XGBoostSpec,
    V1alpha2InferenceServiceSpec,
    V1alpha2InferenceService,
    V1beta1InferenceService,
    V1beta1InferenceServiceSpec,
    V1beta1PredictorSpec,
    V1beta1XGBoostSpec,
)
from kubernetes.client import V1ResourceRequirements

from ..common.utils import predict, KFSERVING_TEST_NAMESPACE

api_version = f"{constants.KFSERVING_GROUP}/{constants.KFSERVING_VERSION}"
api_v1beta1_version = (
    f"{constants.KFSERVING_GROUP}/{constants.KFSERVING_V1BETA1_VERSION}")
KFServing = KFServingClient(
    config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))


def test_xgboost_kfserving():
    service_name = "isvc-xgboost"
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                storage_uri="gs://kfserving-samples/models/xgboost/iris",
                resources=V1ResourceRequirements(
                    requests={
                        "cpu": "100m",
                        "memory": "256Mi"
                    },
                    limits={
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest.mock import patch

from kubernetes import client

from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kfserving import KFServingClient

KFServing = KFServingClient(config_file='./kfserving/test/kubeconfig')

mocked_unit_result = \
    '''
{
    "api_version": "serving.kubeflow.org/v1alpha2",
    "kind": "InferenceService",
    "metadata": {
        "name": "flower-sample",
        "namespace": "kubeflow"
    },
    "spec": {
        "default": {
            "predictor": {
                "tensorflow": {
                    "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers"