def create_outlier_cls(): @model( name="outlier", platform=ModelFramework.Custom, protocol=KFServingV2Protocol(), uri="s3://tempo/outlier/cifar10/outlier", local_folder=os.path.join(ARTIFACTS_FOLDER, OUTLIER_FOLDER), ) class OutlierModel(object): def __init__(self): from alibi_detect.utils.saving import load_detector model = self.get_tempo() models_folder = model.details.local_folder print(f"Loading from {models_folder}") self.od = load_detector(os.path.join(models_folder, "cifar10")) @predictmethod def outlier(self, payload: np.ndarray) -> dict: od_preds = self.od.predict( payload, outlier_type="instance", # use 'feature' or 'instance' level return_feature_score=True, # scores used to determine outliers return_instance_score=True, ) return json.loads(json.dumps(od_preds, cls=NumpyEncoder)) return OutlierModel
def __init__( self, name: str, protocol: Protocol = KFServingV2Protocol(), local_folder: str = None, uri: str = None, platform: ModelFramework = None, inputs: ModelDataType = None, outputs: ModelDataType = None, model_func: Callable[..., Any] = None, conda_env: str = None, runtime_options: RuntimeOptions = RuntimeOptions(), description: str = "", ): """ Parameters ---------- name Name of the pipeline. Needs to be Kubernetes compliant. protocol :class:`tempo.serve.protocol.Protocol`. Defaults to KFserving V2. local_folder Location of local artifacts. uri Location of remote artifacts. platform The :class:`tempo.serve.metadata.ModelFramework` inputs The input types. outputs The output types. conda_env The conda environment name to use. If not specified will look for conda.yaml in local_folder or generate from current running environment. runtime_options The runtime options. Can be left empty and set when creating a runtime. description The description of the model """ super().__init__( name, # TODO: Should we unify names? user_func=model_func, local_folder=local_folder, uri=uri, platform=platform, inputs=inputs, outputs=outputs, conda_env=conda_env, protocol=protocol, runtime_options=runtime_options, description=description, )
def test_kubernetes_spec_pipeline(): details = ModelDetails( name="inference-pipeline", platform=ModelFramework.TempoPipeline, uri="gs://seldon/tempo", local_folder="", inputs=ModelDataArgs(args=[]), outputs=ModelDataArgs(args=[]), ) options = KubernetesOptions(namespace="production", replicas=1) k8s_object = KubernetesSpec(details, KFServingV2Protocol(), options) container_spec = _V2ContainerFactory.get_container_spec(details) container_env = [{ "name": name, "value": value } for name, value in container_spec["environment"].items()] expected = { "apiVersion": "machinelearning.seldon.io/v1", "kind": "SeldonDeployment", "metadata": { "name": details.name, "namespace": options.namespace }, "spec": { "protocol": "kfserving", "predictors": [{ "componentSpecs": [{ "spec": { "containers": [{ "name": "classifier", "image": container_spec["image"], "env": container_env, "args": [], }] } }], "graph": { "modelUri": details.uri, "name": "classifier", "type": "MODEL", "implementation": "TRITON_SERVER", "serviceAccountName": "tempo-pipeline", }, "name": "default", "replicas": options.replicas, }], }, } assert k8s_object.spec == expected
def test_custom_model(v2_input, expected): @model( name="custom", protocol=KFServingV2Protocol(), platform=ModelFramework.Custom, ) def custom_model(a: np.ndarray) -> np.ndarray: return a response = custom_model.request(v2_input) assert response == expected
def test_lambda(input, expected): model = Model( name="test-iris-sklearn", runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol()), platform=ModelFramework.SKLearn, uri="gs://seldon-models/sklearn", local_folder="sklearn/model", model_func=lambda x: np.array([[0, 0, 1]]), ) response = model(input) np.testing.assert_allclose(response, expected, atol=1e-2)
def test_custom_model(v2_input, expected): @model( name="custom", runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol()), platform=ModelFramework.Custom, uri="gs://seldon-models/custom", local_folder="custom_iris_path", ) def custom_model(a: np.ndarray) -> np.ndarray: return a response = custom_model.request(v2_input) assert response == expected
def test_custom_multiheaded_model_list(v2_input, expected): @model( name="multi-headed", runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol()), platform=ModelFramework.Custom, uri="gs://seldon-models/custom", local_folder="custom_iris_path", ) def custom_multiheaded_model_list(a: np.ndarray, b: np.ndarray) -> List[np.ndarray]: return [a, b] response = custom_multiheaded_model_list.request(v2_input) assert response == expected
def test_custom_multiheaded_model_tuple(v2_input, expected): @model( name="test-iris-sklearn", runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol( model_name="multi-headed")), platform=ModelFramework.Custom, uri="gs://seldon-models/custom", local_folder="custom_iris_path", ) def custom_multiheaded_model_tuple( a: np.ndarray, b: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: return a, b response = custom_multiheaded_model_tuple.request(v2_input) assert response == expected
def inference_pipeline() -> Pipeline: # NOTE: We define the class inside the scope of the fixture to make sure # that the serialisation works correctly. # This way, we simulate a remote host, without access to the actual class # definition. runtime = SeldonDockerRuntime(KFServingV2Protocol()) @pipeline( name="inference-pipeline", runtime=runtime, ) def _pipeline(payload: np.ndarray) -> np.ndarray: return payload.sum(keepdims=True) return _pipeline
def test_custom_model_decorator_types(v2_input, expected): @model( name="test-iris-sklearn", runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol( model_name="custom")), platform=ModelFramework.Custom, uri="gs://seldon-models/custom", local_folder="custom_iris_path", inputs=np.ndarray, outputs=np.ndarray, ) def custom_model_decorator_types(a): return a response = custom_model_decorator_types.request(v2_input) assert response == expected
def test_model_spec(): ms = ModelSpec( model_details=ModelDetails( name="test", local_folder="", uri="", platform=ModelFramework.XGBoost, inputs=ModelDataArgs(args=[ModelDataArg(ty=str)]), outputs=ModelDataArgs(args=[]), ), protocol=KFServingV2Protocol(), runtime_options=RuntimeOptions(), ) s = ms.json() j = json.loads(s) ms2 = ModelSpec(**j) assert isinstance(ms2.protocol, KFServingV2Protocol) assert ms2.model_details.inputs.args[0].ty == str
def create_svc_cls(outlier, model): @pipeline( name="cifar10-service", protocol=KFServingV2Protocol(), uri="s3://tempo/outlier/cifar10/svc", local_folder=os.path.join(ARTIFACTS_FOLDER, "svc"), models=PipelineModels(outlier=outlier, cifar10=model), ) class Cifar10Svc(object): @predictmethod def predict(self, payload: np.ndarray) -> np.ndarray: r = self.models.outlier(payload=payload) if r["data"]["is_outlier"][0]: return np.array([]) else: return self.models.cifar10(payload) return Cifar10Svc
def k8s_runtime_v2(k8s_namespace: str) -> SeldonKubernetesRuntime: return SeldonKubernetesRuntime( k8s_options=KubernetesOptions(namespace=k8s_namespace), protocol=KFServingV2Protocol(), )
def docker_runtime_v2() -> Generator[SeldonDockerRuntime, None, None]: runtime = SeldonDockerRuntime(protocol=KFServingV2Protocol()) yield runtime
def test_v2_from_any(data, expected): d = KFServingV2Protocol.create_v2_from_any(data, "a") assert d["name"] == "a" assert d["data"] == expected assert d["datatype"] == "BYTES"
def test_convert_from_bytes(data, ty, expected): output = {"data": data} res = KFServingV2Protocol.convert_from_bytes(output, ty) assert res == expected
def test_v2_from_protocol_response(): res = {"outputs": [{"name": "a", "data": [97, 98, 99], "datatype": "BYTES"}]} modelTyArgs = ModelDataArgs(args=[ModelDataArg(ty=str, name=None)]) v2 = KFServingV2Protocol() res = v2.from_protocol_response(res, modelTyArgs) print(res)
def test_kubernetes_spec_pipeline(): details = ModelDetails( name="inference-pipeline", platform=ModelFramework.TempoPipeline, uri="gs://seldon/tempo", local_folder="", inputs=ModelDataArgs(args=[]), outputs=ModelDataArgs(args=[]), ) options = KubernetesOptions(namespace="production", replicas=1) protocol = KFServingV2Protocol() runtime_options = RuntimeOptions(k8s_options=options) model_spec = ModelSpec(model_details=details, protocol=protocol, runtime_options=runtime_options) k8s_object = KubernetesSpec(model_spec) expected = { "apiVersion": "machinelearning.seldon.io/v1", "kind": "SeldonDeployment", "metadata": { "annotations": { "seldon.io/tempo-description": "", "seldon.io/tempo-model": '{"model_details": ' '{"name": ' '"inference-pipeline", ' '"local_folder": "", ' '"uri": ' '"gs://seldon/tempo", ' '"platform": "tempo", ' '"inputs": {"args": ' '[]}, "outputs": ' '{"args": []}, ' '"description": ""}, ' '"protocol": ' '"tempo.kfserving.protocol.KFServingV2Protocol", ' '"runtime_options": ' '{"runtime": null, ' '"docker_options": ' '{"defaultRuntime": ' '"tempo.seldon.SeldonDockerRuntime"}, ' '"k8s_options": ' '{"replicas": 1, ' '"minReplicas": null, ' '"maxReplicas": null, ' '"authSecretName": ' "null, " '"serviceAccountName": ' "null, " '"defaultRuntime": ' '"tempo.seldon.SeldonKubernetesRuntime", ' '"namespace": ' '"production"}, ' '"ingress_options": ' '{"ingress": ' '"tempo.ingress.istio.IstioIngress", ' '"ssl": false, ' '"verify_ssl": true}}}', }, "labels": { "seldon.io/tempo": "true" }, "name": "inference-pipeline", "namespace": "production", }, "spec": { "protocol": "kfserving", "predictors": [{ "graph": { "modelUri": details.uri, "name": "inference-pipeline", "type": "MODEL", "implementation": "TEMPO_SERVER", "serviceAccountName": "tempo-pipeline", }, "name": "default", "replicas": options.replicas, }], }, } assert k8s_object.spec == expected