コード例 #1
0
def create_inference_service(namespace: str, name: str, storage_url: str,
                             runtime_version: str, service_account_name: str):
    api_version = os.path.join(constants.KFSERVING_GROUP,
                               constants.KFSERVING_VERSION)

    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            service_account_name=service_account_name,
            triton=V1alpha2TritonSpec(runtime_version=runtime_version,
                                      storage_uri=storage_url,
                                      resources=V1ResourceRequirements(
                                          requests={
                                              'cpu': '100m',
                                              'memeory': '1Gi'
                                          },
                                          limits={
                                              'cpu': '100m',
                                              'memory': '1Gi'
                                          }))))
    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=name, namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec),
    )

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(name, namespace=namespace, watch=True, timeout_seconds=300)
コード例 #2
0
 def run(self):
     parser = argparse.ArgumentParser()
     parser.add_argument('--namespace', required=False, default='kubeflow')
     # pvc://${PVCNAME}/dir
     parser.add_argument('--storage_uri', required=False, default='/mnt/export')
     parser.add_argument('--name', required=False, default='kfserving-sample')        
     args = parser.parse_args()
     namespace = args.namespace
     serving_name =  args.name
     
     api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
     default_endpoint_spec = V1alpha2EndpointSpec(
                               predictor=V1alpha2PredictorSpec(
                                 tensorflow=V1alpha2TensorflowSpec(
                                   storage_uri=args.storage_uri,
                                   resources=V1ResourceRequirements(
                                       requests={'cpu':'100m','memory':'1Gi'},
                                       limits={'cpu':'100m', 'memory':'1Gi'}))))
     isvc = V1alpha2InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name=serving_name, namespace=namespace),
                               spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))        
     
     KFServing = KFServingClient()
     KFServing.create(isvc)
     print('waiting 5 sec for Creating InferenceService')
     time.sleep(5)
     
     KFServing.get(serving_name, namespace=namespace, watch=True, timeout_seconds=300)
コード例 #3
0
ファイル: kfservingdeployer.py プロジェクト: mlx-bot/katalog
def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, autoscaling_target=0):
    if int(autoscaling_target) != 0:
        annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)}
    else:
        annotations = None
    metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations)
    if framework != 'custom':
        default_model_spec = ModelSpec(framework, default_model_uri)
    else:
        default_model_spec = customModelSpec(default_custom_model_spec)
    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = ModelSpec(framework, canary_model_uri)
        kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customModelSpec(canary_custom_model_spec)
        kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic)
    else:
        kfsvc = kfserving_deployment(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
def deploy_model(namespace: str, trained_model_path: InputPath(str)):
    from kubernetes import client
    from kfserving import KFServingClient
    from kfserving import constants
    from kfserving import V1alpha2EndpointSpec
    from kfserving import V1alpha2PredictorSpec
    from kfserving import V1alpha2TensorflowSpec
    from kfserving import V1alpha2InferenceServiceSpec
    from kfserving import V1alpha2InferenceService
    from kubernetes.client import V1ResourceRequirements

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    inference_service_name = 'inference112cbk'
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(tensorflow=V1alpha2TensorflowSpec(
            storage_uri=trained_model_path,
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(name=inference_service_name,
                                     namespace=namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    KFServing = KFServingClient()
    KFServing.create(isvc)
    print('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
    print('Model deployed')
コード例 #5
0
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
コード例 #6
0
def deploy_model(namespace, model_file_name, gcp_bucket):

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'xgboost-r' + now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
            min_replicas=1,
            xgboost=V1alpha2XGBoostSpec(
                #storage_uri='gs://'+gcp_bucket+'/rmodel/'+model_file_name,
                storage_uri='gs://' + gcp_bucket + '/rmodel',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=client.V1ObjectMeta(
            name=inference_service_name,
            namespace=namespace,
            annotations={'sidecar.istio.io/inject': 'false'}),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

    #@velascoluis - annotation The sidecar.istio.io/inject: "false", otherwise the ingress does not work

    KFServing = KFServingClient()
    KFServing.create(isvc)
    KFServing.get(inference_service_name,
                  namespace=namespace,
                  watch=True,
                  timeout_seconds=120)
コード例 #7
0
    def create_isvc(self, namespace, isvc):
        """Create the provided InferenceService in the specified namespace.

        :param namespace: The custom resource
        :param InferenceService: The InferenceService body
        :returns: object: Created InferenceService.

        """
        KFServing = KFServingClient()
        try:
            created_isvc = KFServing.create(isvc, namespace=namespace)
            isvc_name = created_isvc['metadata']['name']
            isvc_namespace = created_isvc['metadata']['namespace']
            KFServing.get(isvc_name, isvc_namespace, watch=True)
            return created_isvc
        except client.rest.ApiException:
            raise RuntimeError("Failed to create InferenceService. Perhaps the CRD "
                               "InferenceService version {} is not installed? "\
                                   .format(constants.KFSERVING_VERSION))
コード例 #8
0
def deploy_model(action,
                 model_name,
                 default_model_uri,
                 canary_model_uri,
                 canary_model_traffic,
                 namespace,
                 framework,
                 default_custom_model_spec,
                 canary_custom_model_spec,
                 autoscaling_target=0):
    if int(autoscaling_target) != 0:
        annotations = {
            "autoscaling.knative.dev/target": str(autoscaling_target)
        }
    else:
        annotations = None
    metadata = client.V1ObjectMeta(name=model_name,
                                   namespace=namespace,
                                   annotations=annotations)

    # Create Default deployment if default model uri is provided.
    if framework != 'custom' and default_model_uri:
        default_model_spec = EndpointSpec(framework, default_model_uri)
    elif framework == 'custom' and default_custom_model_spec:
        default_model_spec = customEndpointSpec(default_custom_model_spec)

    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = EndpointSpec(framework, canary_model_uri)
        kfsvc = InferenceService(metadata, default_model_spec,
                                 canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customEndpointSpec(canary_custom_model_spec)
        kfsvc = InferenceService(metadata, default_model_spec,
                                 canary_model_spec, canary_model_traffic)
    else:
        kfsvc = InferenceService(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc, watch=True, timeout_seconds=120)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'rollout':
        KFServing.rollout_canary(model_name,
                                 canary=canary_model_spec,
                                 percent=canary_model_traffic,
                                 namespace=namespace,
                                 watch=True,
                                 timeout_seconds=120)
    elif action == 'promote':
        KFServing.promote(model_name,
                          namespace=namespace,
                          watch=True,
                          timeout_seconds=120)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
コード例 #9
0
    def deploy(self, kfservice_id):
        mykfservice = db.session.query(KfService).filter_by(
            id=kfservice_id).first()

        namespace = conf.get('KFSERVING_NAMESPACE')
        crd_info = conf.get('CRD_INFO')['inferenceservice']

        # 根据service生成container
        def make_container(service, mykfservice):
            from myapp.utils.py.py_k8s import K8s
            k8s = K8s()  # 不部署,不需要配置集群信息
            container = k8s.make_container(
                name=mykfservice.name + "-" + service.name,
                command=["sh", "-c", service.command]
                if service.command else None,
                args=None,
                volume_mount=None,
                image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
                image=service.images,
                working_dir=service.working_dir
                if service.working_dir else None,
                env=service.env,
                resource_memory=service.resource_memory,
                resource_cpu=service.resource_cpu,
                resource_gpu=service.resource_gpu,
                username=service.created_by.username)
            return container

        api_version = crd_info['group'] + '/' + crd_info['version']
        default_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.default_service.min_replicas,
                max_replicas=mykfservice.default_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.default_service,
                    mykfservice)))) if mykfservice.default_service else None

        canary_endpoint_spec = V1alpha2EndpointSpec(
            predictor=V1alpha2PredictorSpec(
                min_replicas=mykfservice.canary_service.min_replicas,
                max_replicas=mykfservice.canary_service.max_replicas,
                custom=V1alpha2CustomSpec(container=make_container(
                    mykfservice.canary_service,
                    mykfservice)))) if mykfservice.canary_service else None

        metadata = kubernetes.client.V1ObjectMeta(
            name=mykfservice.name,
            labels={
                "app": mykfservice.name,
                "rtx-user": mykfservice.created_by.username
            },
            namespace=namespace)

        isvc = V1alpha2InferenceService(
            api_version=api_version,
            kind=crd_info['kind'],
            metadata=metadata,
            spec=V1alpha2InferenceServiceSpec(
                default=default_endpoint_spec,
                canary=canary_endpoint_spec,
                canary_traffic_percent=mykfservice.canary_traffic_percent))

        KFServing = KFServingClient()
        try:
            KFServing.delete(mykfservice.name,
                             namespace=namespace,
                             version=crd_info['version'])
        except Exception as e:
            print(e)

        KFServing.create(isvc,
                         namespace=namespace,
                         version=crd_info['version'])

        flash(category='warning', message='部署启动,一分钟后部署完成')
        return redirect('/kfservice_modelview/list/')
コード例 #10
0
    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = ModelSpec(framework, canary_model_uri)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customModelSpec(canary_custom_model_spec)
        kfsvc = kfserving_deployment(metadata, default_model_spec,
                                     canary_model_spec, canary_model_traffic)
    else:
        kfsvc = kfserving_deployment(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    print(model_status)

    if not os.path.exists(os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))
    with open(output_path, "w") as report:
        report.write(json.dumps(model_status))
コード例 #11
0
ファイル: kfserving-s3.py プロジェクト: mojokb/kubexxx-off
default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2TensorflowSpec(
        storage_uri="s3://anonymous-model-result/result/saved_model",
        resources=V1ResourceRequirements(requests={
            "cpu": "100m",
            "memory": "1Gi"
        },
                                         limits={
                                             "cpu": "100m",
                                             "memory": "1Gi"
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name="mnist-kfserving", namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()
KFServing.set_credentials(
    storage_type="S3",
    namespace='anonymous',
    credentials_file='credentials',
    s3_profile="default",
    s3_endpoint="minio-service.kubeflow.svc.cluster.local:9000",
    s3_region="us-west-1",
    s3_use_https="0",
    s3_verify_ssl="0")

KFServing.create(isvc)
コード例 #12
0
def main():

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
            container=V1Container(
                name="kfserving-container",
                image=FLAGS.image,
                env=[{
                    "name": "STORAGE_URI",
                    "value": "%s" % FLAGS.storage_uri
                }],
                resources=V1ResourceRequirements(
                    limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}),
                command=["python"],
                args=[
                    "model.py",
                    "--model-name",
                    "%s" % FLAGS.inference_name,
                    "--out_dir",
                    "%s" % FLAGS.model_path,
                    "--classes_file",
                    "%s" % FLAGS.classes_file,
                ]))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name,
                                         namespace=FLAGS.namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    # Create inference service
    KFServing = KFServingClient()
    KFServing.create(isvc)
    time.sleep(2)

    # Check inference service
    KFServing.get(FLAGS.inference_name,
                  namespace=FLAGS.namespace,
                  watch=True,
                  timeout_seconds=180)

    model_status = KFServing.get(FLAGS.inference_name,
                                 namespace=FLAGS.namespace)

    for condition in model_status["status"]["conditions"]:
        if condition['type'] == 'Ready':
            if condition['status'] == 'True':
                print('Model is ready')
                break
            else:
                print(
                    'Model is timed out, please check the inferenceservice events for more details.'
                )
                exit(1)
    try:
        print(
            model_status["status"]["url"] +
            " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands"
        )
        print("Sample test commands: ")
        print(
            "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT"
        )
        print(
            "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
        )
        # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample
        url = re.compile(r"https?://")
        host, path = url.sub("", model_status["status"]["url"]).split("/", 1)
        print('curl -X GET -H "Host: ' + host +
              '" http://$ISTIO_INGRESS_ENDPOINT/' + path)
    except:
        print("Model is not ready, check the logs for the Knative URL status.")
        exit(1)