コード例 #1
0
ファイル: kfservingdeployer.py プロジェクト: mlx-bot/katalog
def deploy_model(action, model_name, default_model_uri, canary_model_uri, canary_model_traffic, namespace, framework, default_custom_model_spec, canary_custom_model_spec, autoscaling_target=0):
    if int(autoscaling_target) != 0:
        annotations = {"autoscaling.knative.dev/target": str(autoscaling_target)}
    else:
        annotations = None
    metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations)
    if framework != 'custom':
        default_model_spec = ModelSpec(framework, default_model_uri)
    else:
        default_model_spec = customModelSpec(default_custom_model_spec)
    # Create Canary deployment if canary model uri is provided.
    if framework != 'custom' and canary_model_uri:
        canary_model_spec = ModelSpec(framework, canary_model_uri)
        kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic)
    elif framework == 'custom' and canary_custom_model_spec:
        canary_model_spec = customModelSpec(canary_custom_model_spec)
        kfsvc = kfserving_deployment(metadata, default_model_spec, canary_model_spec, canary_model_traffic)
    else:
        kfsvc = kfserving_deployment(metadata, default_model_spec)

    KFServing = KFServingClient()

    if action == 'create':
        KFServing.create(kfsvc)
    elif action == 'update':
        KFServing.patch(model_name, kfsvc)
    elif action == 'delete':
        KFServing.delete(model_name, namespace=namespace)
    else:
        raise("Error: No matching action: " + action)

    model_status = KFServing.get(model_name, namespace=namespace)
    return model_status
コード例 #2
0
def deploy_model(namespace,trained_model_path):
    logging.basicConfig(level=logging.INFO)
    logging.info('Starting deploy model step ..')
    logging.info('Input data ..')
    logging.info('namespace:{}'.format(namespace))
    logging.info('trained_model_path:{}'.format(trained_model_path))

    logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..')
    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
    inference_service_name = 'simple-model'+now
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(
        tensorflow=V1alpha2TensorflowSpec(
        storage_uri=trained_model_path,
        resources=V1ResourceRequirements(
        requests={'cpu': '100m', 'memory': '1Gi'},
        limits={'cpu': '100m', 'memory': '1Gi'}))))

    isvc = V1alpha2InferenceService(api_version=api_version,
                                    kind=constants.KFSERVING_KIND,
                                    metadata=client.V1ObjectMeta(
                                    name=inference_service_name,
                                    annotations=
                                            {
                                                'sidecar.istio.io/inject': 'false',
                                                'autoscaling.knative.dev/target': '1'
                                            },
                                    namespace=namespace),
                                    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

#velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829
#Note: make sure trained model path starts with file:// or gs://

    KFServing = KFServingClient()
    logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..')
    KFServing.create(isvc)
    logging.info('Inference service ' + inference_service_name + " created ...")
    KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120)
    logging.info('Deploy model step finished')
コード例 #3
0
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION

default_endpoint_spec = V1alpha2EndpointSpec(predictor=V1alpha2PredictorSpec(
    tensorflow=V1alpha2CustomSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
        resources=V1ResourceRequirements(requests={
            'cpu': '100m',
            'memory': '1Gi'
        },
                                         limits={
                                             'cpu': '100m',
                                             'memory': '1Gi'
                                         }))))

isvc = V1alpha2InferenceService(
    api_version=api_version,
    kind=constants.KFSERVING_KIND,
    metadata=client.V1ObjectMeta(name='flower-sample', namespace=namespace),
    spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

KFServing = KFServingClient()

KFServing.create(isvc)

KFServing.get('flower-sample',
              namespace=namespace,
              watch=True,
              timeout_seconds=120)

# KFServing.delete('flower-sample', namespace=namespace)
コード例 #4
0
def main():

    api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
    default_endpoint_spec = V1alpha2EndpointSpec(
        predictor=V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
            container=V1Container(
                name="kfserving-container",
                image=FLAGS.image,
                env=[{
                    "name": "STORAGE_URI",
                    "value": "%s" % FLAGS.storage_uri
                }],
                resources=V1ResourceRequirements(
                    limits={"nvidia.com/gpu": FLAGS.gpus_to_inference}),
                command=["python"],
                args=[
                    "model.py",
                    "--model-name",
                    "%s" % FLAGS.inference_name,
                    "--out_dir",
                    "%s" % FLAGS.model_path,
                    "--classes_file",
                    "%s" % FLAGS.classes_file,
                ]))))

    isvc = V1alpha2InferenceService(
        api_version=api_version,
        kind=constants.KFSERVING_KIND,
        metadata=k8s_client.V1ObjectMeta(name=FLAGS.inference_name,
                                         namespace=FLAGS.namespace),
        spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
    # Create inference service
    KFServing = KFServingClient()
    KFServing.create(isvc)
    time.sleep(2)

    # Check inference service
    KFServing.get(FLAGS.inference_name,
                  namespace=FLAGS.namespace,
                  watch=True,
                  timeout_seconds=180)

    model_status = KFServing.get(FLAGS.inference_name,
                                 namespace=FLAGS.namespace)

    for condition in model_status["status"]["conditions"]:
        if condition['type'] == 'Ready':
            if condition['status'] == 'True':
                print('Model is ready')
                break
            else:
                print(
                    'Model is timed out, please check the inferenceservice events for more details.'
                )
                exit(1)
    try:
        print(
            model_status["status"]["url"] +
            " is the knative domain header. $ISTIO_INGRESS_ENDPOINT are defined in the below commands"
        )
        print("Sample test commands: ")
        print(
            "# Note: If Istio Ingress gateway is not served with LoadBalancer, use $CLUSTER_NODE_IP:31380 as the ISTIO_INGRESS_ENDPOINT"
        )
        print(
            "ISTIO_INGRESS_ENDPOINT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
        )
        # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample
        url = re.compile(r"https?://")
        host, path = url.sub("", model_status["status"]["url"]).split("/", 1)
        print('curl -X GET -H "Host: ' + host +
              '" http://$ISTIO_INGRESS_ENDPOINT/' + path)
    except:
        print("Model is not ready, check the logs for the Knative URL status.")
        exit(1)