def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, autoscaling_target=0): if int(autoscaling_target) != 0: annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)} else: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) if framework != 'custom': default_model_spec = ModelSpec(framework, default_model_uri) else: default_model_spec = customModelSpec(default_custom_model_spec) # Create Canary deployment if canary model uri is provided. if framework != 'custom' and canary_model_uri: canary_model_spec = ModelSpec(framework, canary_model_uri) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) elif framework == 'custom' and canary_custom_model_spec: canary_model_spec = customModelSpec(canary_custom_model_spec) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) else: kfsvc = kfserving_deployment(metadata, default_model_spec) KFServing = KFServingClient() if action == 'create': KFServing.create(kfsvc) elif action == 'update': KFServing.patch(model_name, kfsvc) elif action == 'delete': KFServing.delete(model_name, namespace=namespace) else: raise("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) return model_status
def delete_isvc(self, name, namespace): """Delete the provided InferenceService in the specified namespace. :param name: The custom object :param namespace: The custom resource :returns: object: The deleted InferenceService. """ KFServing = KFServingClient() return KFServing.delete(name, namespace=namespace)
def delete_isvc(self, name, namespace): """Delete the provided InferenceService in the specified namespace. :param name: The custom object :param namespace: The custom resource :returns: object: The deleted InferenceService. """ KFServing = KFServingClient( config_file=self.config_file, context=self.context, client_configuration=self.client_configuration, persist_config=self.persist_config) return KFServing.delete(name, namespace=namespace)
def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, service_account, autoscaling_target=0, enable_istio_sidecar=True, inferenceservice_yaml={}, watch_timeout=120, min_replicas=0, max_replicas=0): KFServing = KFServingClient() if inferenceservice_yaml: # Overwrite name and namespace if exist if namespace: inferenceservice_yaml['metadata']['namespace'] = namespace if model_name: inferenceservice_yaml['metadata']['name'] = model_name kfsvc = inferenceservice_yaml else: # Create annotation annotations = {} if int(autoscaling_target) != 0: annotations["autoscaling.knative.dev/target"] = str( autoscaling_target) if not enable_istio_sidecar: annotations["sidecar.istio.io/inject"] = 'false' if not annotations: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) # Create Default deployment if default model uri is provided. if framework != "custom" and default_model_uri: default_model_spec = EndpointSpec(framework, default_model_uri, service_account, min_replicas, max_replicas) elif framework == "custom" and default_custom_model_spec: default_model_spec = customEndpointSpec(default_custom_model_spec, service_account, min_replicas, max_replicas) # Create Canary deployment if canary model uri is provided. if framework != "custom" and canary_model_uri: canary_model_spec = EndpointSpec(framework, canary_model_uri, service_account, min_replicas, max_replicas) kfsvc = InferenceService(metadata, default_model_spec, canary_model_spec, canary_model_traffic) elif framework == "custom" and canary_custom_model_spec: canary_model_spec = customEndpointSpec(canary_custom_model_spec, service_account, min_replicas, max_replicas) kfsvc = InferenceService(metadata, default_model_spec, canary_model_spec, canary_model_traffic) else: kfsvc = InferenceService(metadata, default_model_spec) def create(kfsvc, model_name, namespace): KFServing.create(kfsvc, namespace=namespace) time.sleep(1) KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=watch_timeout) def update(kfsvc, model_name, namespace): KFServing.patch(model_name, kfsvc, namespace=namespace) time.sleep(1) KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=watch_timeout) if action == "create": create(kfsvc, model_name, namespace) elif action == "update": update(kfsvc, model_name, namespace) elif action == "apply": try: create(kfsvc, model_name, namespace) except: update(kfsvc, model_name, namespace) elif action == "rollout": if inferenceservice_yaml: raise ("Rollout is not supported for inferenceservice yaml") KFServing.rollout_canary( model_name, canary=canary_model_spec, percent=canary_model_traffic, namespace=namespace, watch=True, timeout_seconds=watch_timeout, ) elif action == "promote": KFServing.promote(model_name, namespace=namespace, watch=True, timeout_seconds=watch_timeout) elif action == "delete": KFServing.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) return model_status
def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, custom_model_spec, service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the provided input and then sent for creation/update. :return InferenceService JSON output """ kfs_client = KFServingClient() if inferenceservice_yaml: # Overwrite name and namespace if exists if namespace: inferenceservice_yaml['metadata']['namespace'] = namespace if model_name: inferenceservice_yaml['metadata']['name'] = model_name else: model_name = inferenceservice_yaml['metadata']['name'] kfsvc = inferenceservice_yaml elif action != 'delete': # Create annotations annotations = {} if int(autoscaling_target) != 0: annotations["autoscaling.knative.dev/target"] = str(autoscaling_target) if not enable_istio_sidecar: annotations["sidecar.istio.io/inject"] = 'false' if not annotations: annotations = None metadata = client.V1ObjectMeta( name=model_name, namespace=namespace, annotations=annotations ) # If a custom model container spec was provided, build the V1Container # object using it. containers = [] if custom_model_spec: containers = [create_custom_container_spec(custom_model_spec)] # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec( framework, model_uri, canary_traffic_percent, service_account, min_replicas, max_replicas, containers, request_timeout ) kfsvc = create_inference_service(metadata, predictor_spec) if action == "create": submit_api_request(kfs_client, 'create', model_name, kfsvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "update": submit_api_request(kfs_client, 'update', model_name, kfsvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "apply": try: submit_api_request(kfs_client, 'create', model_name, kfsvc, namespace, watch=True, timeout_seconds=watch_timeout) except Exception: submit_api_request(kfs_client, 'update', model_name, kfsvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "delete": kfs_client.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = kfs_client.get(model_name, namespace=namespace) return model_status
def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, autoscaling_target=0): if int(autoscaling_target) != 0: annotations = { "autoscaling.knative.dev/target": str(autoscaling_target) } else: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) # Create Default deployment if default model uri is provided. if framework != 'custom' and default_model_uri: default_model_spec = EndpointSpec(framework, default_model_uri) elif framework == 'custom' and default_custom_model_spec: default_model_spec = customEndpointSpec(default_custom_model_spec) # Create Canary deployment if canary model uri is provided. if framework != 'custom' and canary_model_uri: canary_model_spec = EndpointSpec(framework, canary_model_uri) kfsvc = InferenceService(metadata, default_model_spec, canary_model_spec, canary_model_traffic) elif framework == 'custom' and canary_custom_model_spec: canary_model_spec = customEndpointSpec(canary_custom_model_spec) kfsvc = InferenceService(metadata, default_model_spec, canary_model_spec, canary_model_traffic) else: kfsvc = InferenceService(metadata, default_model_spec) KFServing = KFServingClient() if action == 'create': KFServing.create(kfsvc, watch=True, timeout_seconds=120) elif action == 'update': KFServing.patch(model_name, kfsvc) elif action == 'rollout': KFServing.rollout_canary(model_name, canary=canary_model_spec, percent=canary_model_traffic, namespace=namespace, watch=True, timeout_seconds=120) elif action == 'promote': KFServing.promote(model_name, namespace=namespace, watch=True, timeout_seconds=120) elif action == 'delete': KFServing.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) return model_status
def deploy(self, kfservice_id): mykfservice = db.session.query(KfService).filter_by( id=kfservice_id).first() namespace = conf.get('KFSERVING_NAMESPACE') crd_info = conf.get('CRD_INFO')['inferenceservice'] # 根据service生成container def make_container(service, mykfservice): from myapp.utils.py.py_k8s import K8s k8s = K8s() # 不部署,不需要配置集群信息 container = k8s.make_container( name=mykfservice.name + "-" + service.name, command=["sh", "-c", service.command] if service.command else None, args=None, volume_mount=None, image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'), image=service.images, working_dir=service.working_dir if service.working_dir else None, env=service.env, resource_memory=service.resource_memory, resource_cpu=service.resource_cpu, resource_gpu=service.resource_gpu, username=service.created_by.username) return container api_version = crd_info['group'] + '/' + crd_info['version'] default_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.default_service.min_replicas, max_replicas=mykfservice.default_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.default_service, mykfservice)))) if mykfservice.default_service else None canary_endpoint_spec = V1alpha2EndpointSpec( predictor=V1alpha2PredictorSpec( min_replicas=mykfservice.canary_service.min_replicas, max_replicas=mykfservice.canary_service.max_replicas, custom=V1alpha2CustomSpec(container=make_container( mykfservice.canary_service, mykfservice)))) if mykfservice.canary_service else None metadata = kubernetes.client.V1ObjectMeta( name=mykfservice.name, labels={ "app": mykfservice.name, "rtx-user": mykfservice.created_by.username }, namespace=namespace) isvc = V1alpha2InferenceService( api_version=api_version, kind=crd_info['kind'], metadata=metadata, spec=V1alpha2InferenceServiceSpec( default=default_endpoint_spec, canary=canary_endpoint_spec, canary_traffic_percent=mykfservice.canary_traffic_percent)) KFServing = KFServingClient() try: KFServing.delete(mykfservice.name, namespace=namespace, version=crd_info['version']) except Exception as e: print(e) KFServing.create(isvc, namespace=namespace, version=crd_info['version']) flash(category='warning', message='部署启动,一分钟后部署完成') return redirect('/kfservice_modelview/list/')
def deploy_model( action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, service_account, autoscaling_target=0, ): if int(autoscaling_target) != 0: annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)} else: annotations = None metadata = client.V1ObjectMeta( name=model_name, namespace=namespace, annotations=annotations ) # Create Default deployment if default model uri is provided. if framework != "custom" and default_model_uri: default_model_spec = EndpointSpec(framework, default_model_uri, service_account) elif framework == "custom" and default_custom_model_spec: default_model_spec = customEndpointSpec( default_custom_model_spec, service_account ) # Create Canary deployment if canary model uri is provided. if framework != "custom" and canary_model_uri: canary_model_spec = EndpointSpec(framework, canary_model_uri, service_account) kfsvc = InferenceService( metadata, default_model_spec, canary_model_spec, canary_model_traffic ) elif framework == "custom" and canary_custom_model_spec: canary_model_spec = customEndpointSpec( canary_custom_model_spec, service_account ) kfsvc = InferenceService( metadata, default_model_spec, canary_model_spec, canary_model_traffic ) else: kfsvc = InferenceService(metadata, default_model_spec) KFServing = KFServingClient() def create(kfsvc, model_name, namespace): KFServing.create(kfsvc) time.sleep(1) KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=120) def update(kfsvc, model_name, namespace): KFServing.patch(model_name, kfsvc) time.sleep(1) KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=120) if action == "create": create(kfsvc, model_name, namespace) elif action == "update": update(kfsvc, model_name, namespace) elif action == "apply": try: create(kfsvc, model_name, namespace) except: update(kfsvc, model_name, namespace) elif action == "rollout": KFServing.rollout_canary( model_name, canary=canary_model_spec, percent=canary_model_traffic, namespace=namespace, watch=True, timeout_seconds=120, ) elif action == "promote": KFServing.promote( model_name, namespace=namespace, watch=True, timeout_seconds=120 ) elif action == "delete": KFServing.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) return model_status
# Create Canary deployment if canary model uri is provided. if framework != 'custom' and canary_model_uri: canary_model_spec = ModelSpec(framework, canary_model_uri) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) elif framework == 'custom' and canary_custom_model_spec: canary_model_spec = customModelSpec(canary_custom_model_spec) kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic) else: kfsvc = kfserving_deployment(metadata, default_model_spec) KFServing = KFServingClient() if action == 'create': KFServing.create(kfsvc) elif action == 'update': KFServing.patch(model_name, kfsvc) elif action == 'delete': KFServing.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = KFServing.get(model_name, namespace=namespace) print(model_status) if not os.path.exists(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) with open(output_path, "w") as report: report.write(json.dumps(model_status))