def _config_inference_apis(self): self._inference_apis = [] for _, function in inspect.getmembers( self.__class__, predicate=lambda x: inspect.isfunction(x) or inspect.ismethod( x), ): if hasattr(function, "_is_api"): api_name = getattr(function, "_api_name") route = getattr(function, "_api_route", None) api_doc = getattr(function, "_api_doc") input_adapter = getattr(function, "_input_adapter") output_adapter = getattr(function, "_output_adapter") mb_max_latency = getattr(function, "_mb_max_latency") mb_max_batch_size = getattr(function, "_mb_max_batch_size") batch = getattr(function, "_batch") # Bind api method call with self(BentoService instance) user_func = function.__get__(self) self._inference_apis.append( InferenceAPI( self, api_name, api_doc, input_adapter=input_adapter, user_func=user_func, output_adapter=output_adapter, mb_max_latency=mb_max_latency, mb_max_batch_size=mb_max_batch_size, batch=batch, route=route, ))
def _config_inference_apis(self): self._inference_apis = [] for _, function in inspect.getmembers( self.__class__, predicate=lambda x: inspect.isfunction(x) or inspect.ismethod( x), ): if hasattr(function, "_is_api"): api_name = getattr(function, "_api_name") route = getattr(function, "_api_route", None) api_doc = getattr(function, "_api_doc") http_methods = getattr(function, "_http_methods") # TODO: Add this while scaling # mb_max_latency = getattr(function, "_mb_max_latency") # mb_max_batch_size = getattr(function, "_mb_max_batch_size") # Bind api method call with self(BentoService instance) user_func = function.__get__(self) self._inference_apis.append( InferenceAPI( self, api_name, api_doc, user_func=user_func, http_methods=http_methods, # TODO: Add this while scaling # mb_max_latency=mb_max_latency, # mb_max_batch_size=mb_max_batch_size, route=route, ))