Example #1
0
    def __init__(self, provider: str, api_spec: dict, model_dir: str):
        """
        Args:
            provider: "aws" or "gcp".
            api_spec: API configuration.
            model_dir: Where the models are stored on disk.
        """

        self.provider = provider

        self.type = predictor_type_from_api_spec(api_spec)
        self.path = api_spec["predictor"]["path"]
        self.config = api_spec["predictor"].get("config", {})

        self.api_spec = api_spec

        self.crons = []
        if not are_models_specified(self.api_spec):
            return

        self.model_dir = model_dir

        self.caching_enabled = self._is_model_caching_enabled()
        self.multiple_processes = self.api_spec["predictor"][
            "processes_per_replica"] > 1

        # model caching can only be enabled when processes_per_replica is 1
        # model side-reloading is supported for any number of processes_per_replica

        if self.caching_enabled:
            if self.type == PythonPredictorType:
                mem_cache_size = self.api_spec["predictor"][
                    "multi_model_reloading"]["cache_size"]
                disk_cache_size = self.api_spec["predictor"][
                    "multi_model_reloading"]["disk_cache_size"]
            else:
                mem_cache_size = self.api_spec["predictor"]["models"][
                    "cache_size"]
                disk_cache_size = self.api_spec["predictor"]["models"][
                    "disk_cache_size"]
            self.models = ModelsHolder(
                self.type,
                self.model_dir,
                mem_cache_size=mem_cache_size,
                disk_cache_size=disk_cache_size,
                on_download_callback=model_downloader,
            )
        elif not self.caching_enabled and self.type not in [
                TensorFlowPredictorType,
                TensorFlowNeuronPredictorType,
        ]:
            self.models = ModelsHolder(self.type, self.model_dir)
        else:
            self.models = None

        if self.multiple_processes:
            self.models_tree = None
        else:
            self.models_tree = ModelsTree()
Example #2
0
    def initialize_client(
        self,
        tf_serving_host: Optional[str] = None,
        tf_serving_port: Optional[str] = None
    ) -> Union[PythonClient, TensorFlowClient, ONNXClient]:
        """
        Initialize client that gives access to models specified in the API spec (cortex.yaml).
        Only applies when models are provided in the API spec.

        Args:
            tf_serving_host: Host of TF serving server. To be only used when the TensorFlow predictor is used.
            tf_serving_port: Port of TF serving server. To be only used when the TensorFlow predictor is used.

        Return:
            The client for the respective predictor type.
        """

        client = None

        if are_models_specified(self.api_spec):
            if self.type == PythonPredictorType:
                client = PythonClient(self.api_spec, self.models,
                                      self.model_dir, self.models_tree)

            if self.type in [
                    TensorFlowPredictorType, TensorFlowNeuronPredictorType
            ]:
                tf_serving_address = tf_serving_host + ":" + tf_serving_port
                client = TensorFlowClient(
                    tf_serving_address,
                    self.api_spec,
                    self.models,
                    self.model_dir,
                    self.models_tree,
                )
                if not self.caching_enabled:
                    cron = TFSAPIServingThreadUpdater(interval=5.0,
                                                      client=client._client)
                    cron.start()

            if self.type == ONNXPredictorType:
                client = ONNXClient(self.api_spec, self.models, self.model_dir,
                                    self.models_tree)

        return client
Example #3
0
    def initialize_impl(
        self,
        project_dir: str,
        client: Union[PythonClient, TensorFlowClient],
        metrics_client: DogStatsd,
        job_spec: Optional[Dict[str, Any]] = None,
        proto_module_pb2: Optional[Any] = None,
    ):
        """
        Initialize predictor class as provided by the user.

        job_spec is a dictionary when the "kind" of the API is set to "BatchAPI". Otherwise, it's None.
        proto_module_pb2 is a module of the compiled proto when grpc is enabled for the "RealtimeAPI" kind. Otherwise, it's None.

        Can raise UserRuntimeException/UserException/CortexException.
        """

        # build args
        class_impl = self.class_impl(project_dir)
        constructor_args = inspect.getfullargspec(class_impl.__init__).args
        config = deepcopy(self.config)
        args = {}
        if job_spec is not None and job_spec.get("config") is not None:
            util.merge_dicts_in_place_overwrite(config, job_spec["config"])
        if "config" in constructor_args:
            args["config"] = config
        if "job_spec" in constructor_args:
            args["job_spec"] = job_spec
        if "metrics_client" in constructor_args:
            args["metrics_client"] = metrics_client
        if "proto_module_pb2" in constructor_args:
            args["proto_module_pb2"] = proto_module_pb2

        # initialize predictor class
        try:
            if self.type == PythonPredictorType:
                if are_models_specified(self.api_spec):
                    args["python_client"] = client
                    # set load method to enable the use of the client in the constructor
                    # setting/getting from self in load_model won't work because self will be set to None
                    client.set_load_method(lambda model_path: class_impl.
                                           load_model(None, model_path))
                    initialized_impl = class_impl(**args)
                    client.set_load_method(initialized_impl.load_model)
                else:
                    initialized_impl = class_impl(**args)
            if self.type in [
                    TensorFlowPredictorType, TensorFlowNeuronPredictorType
            ]:
                args["tensorflow_client"] = client
                initialized_impl = class_impl(**args)
        except Exception as e:
            raise UserRuntimeException(self.path, "__init__", str(e)) from e

        # initialize the crons if models have been specified and if the API kind is RealtimeAPI
        if are_models_specified(
                self.api_spec) and self.api_spec["kind"] == "RealtimeAPI":
            if not self.multiple_processes and self.caching_enabled:
                self.crons += [
                    ModelTreeUpdater(
                        interval=10,
                        api_spec=self.api_spec,
                        tree=self.models_tree,
                        ondisk_models_dir=self.model_dir,
                    ),
                    ModelsGC(
                        interval=10,
                        api_spec=self.api_spec,
                        models=self.models,
                        tree=self.models_tree,
                    ),
                ]

            if not self.caching_enabled and self.type == PythonPredictorType:
                self.crons += [
                    FileBasedModelsGC(interval=10,
                                      models=self.models,
                                      download_dir=self.model_dir)
                ]

        for cron in self.crons:
            cron.start()

        return initialized_impl