def predict(self, batch): """ PREDICT COMMAND = { "command": "predict", "batch": [ REQUEST_INPUT ] } :param batch: list of request :return: """ headers, input_batch, req_id_map = Service.retrieve_data_for_inference( batch) self.context.request_ids = req_id_map self.context.request_processor = headers metrics = MetricsStore(req_id_map, self.context.model_name) self.context.metrics = metrics start_time = time.time() # noinspection PyBroadException try: ret = self._entry_point(input_batch, self.context) except MemoryError: logger.error("System out of memory", exc_info=True) return create_predict_response(None, req_id_map, "Out of resources", 507) except Exception as e: # pylint: disable=broad-except logger.warning("Invoking custom service failed.", exc_info=True) message = "Prediction failed: " + str(e) + " \n " + str( traceback.format_exc()) return create_predict_response(None, req_id_map, message, 503) if not isinstance(ret, list): logger.warning("model: %s, Invalid return type: %s.", self.context.model_name, type(ret)) return create_predict_response(None, req_id_map, "Invalid model predict output", 503) if len(ret) != len(input_batch): logger.warning( "model: %s, number of batch response mismatched, expect: %d, got: %d.", self.context.model_name, len(input_batch), len(ret)) return create_predict_response( None, req_id_map, "number of batch response mismatched", 503) duration = round((time.time() - start_time) * 1000, 2) metrics.add_time(PREDICTION_METRIC, duration) return create_predict_response(ret, req_id_map, "Prediction success", 200, context=self.context)
def load(self, model_name, model_dir, handler, gpu_id, batch_size, envelope=None): """ Load TorchServe 1.0 model from file. :param model_name: :param model_dir: :param handler: :param gpu_id: :param batch_size: :param envelope: :return: """ logging.debug("Loading model - working dir: %s", os.getcwd()) # TODO: Request ID is not given. UUID is a temp UUID. metrics = MetricsStore(uuid.uuid4(), model_name) manifest_file = os.path.join(model_dir, "MAR-INF/MANIFEST.json") manifest = None if os.path.exists(manifest_file): with open(manifest_file) as f: manifest = json.load(f) function_name = None try: module, function_name = self._load_handler_file(handler) except ImportError: module = self._load_default_handler(handler) if module is None: raise ValueError( "Unable to load module {}, make sure it is added to python path" .format(module_name)) envelope_class = None if envelope is not None: envelope_class = self._load_default_envelope(envelope) function_name = function_name or "handle" if hasattr(module, function_name): entry_point, initialize_fn = self._get_function_entry_point( module, function_name) else: entry_point, initialize_fn = self._get_class_entry_point(module) if envelope_class is not None: envelope_instance = envelope_class(entry_point) entry_point = envelope_instance.handle service = Service(model_name, model_dir, manifest, entry_point, gpu_id, batch_size) service.context.metrics = metrics initialize_fn(service.context) return service
def __init__(self, model_pt_file='model.pt', model_dir='ts/torch_handler/unit_tests/models/tmp', model_file='model.py', gpu_id='0', model_name="mnist"): self.manifest = { 'model': { 'serializedFile': model_pt_file, } } if model_file: self.manifest['model']['modelFile'] = model_file self.system_properties = {'model_dir': model_dir} if torch.cuda.is_available() and gpu_id: self.system_properties['gpu_id'] = gpu_id self.metrics = MetricsStore(uuid.uuid4(), model_name)
def test_metrics(caplog): """ Test if metric classes methods behave as expected Also checks global metric service methods """ # Create a batch of request ids request_ids = {0: 'abcd', 1: "xyz", 2: "qwerty", 3: "hjshfj"} all_req_ids = ','.join(request_ids.values()) model_name = "dummy model" # Create a metrics objects metrics = MetricsStore(request_ids, model_name) # Counter tests metrics.add_counter('CorrectCounter', 1, 1) test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', 'xyz', model_name)] assert 'CorrectCounter' == test_metric.name metrics.add_counter('CorrectCounter', 1, 1) metrics.add_counter('CorrectCounter', 1, 3) metrics.add_counter('CorrectCounter', 1) test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', all_req_ids, model_name)] assert 'CorrectCounter' == test_metric.name metrics.add_counter('CorrectCounter', 3) test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', 'xyz', model_name)] assert test_metric.value == 2 test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', 'hjshfj', model_name)] assert test_metric.value == 1 test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', all_req_ids, model_name)] assert test_metric.value == 4 # Check what is emitted is correct emit_metrics(metrics.store) assert "hjshfj" in caplog.text assert "ModelName:dummy model" in caplog.text # Adding other types of metrics # Check for time metric with pytest.raises(Exception) as e_info: metrics.add_time('WrongTime', 20, 1, 'ns') assert "the unit for a timed metric should be one of ['ms', 's']" == e_info.value.args[ 0] metrics.add_time('CorrectTime', 20, 2, 's') metrics.add_time('CorrectTime', 20, 0) test_metric = metrics.cache[get_model_key('CorrectTime', 'ms', 'abcd', model_name)] assert test_metric.value == 20 assert test_metric.unit == 'Milliseconds' test_metric = metrics.cache[get_model_key('CorrectTime', 's', 'qwerty', model_name)] assert test_metric.value == 20 assert test_metric.unit == 'Seconds' # Size based metrics with pytest.raises(Exception) as e_info: metrics.add_size('WrongSize', 20, 1, 'TB') assert "The unit for size based metric is one of ['MB','kB', 'GB', 'B']" == e_info.value.args[ 0] metrics.add_size('CorrectSize', 200, 0, 'GB') metrics.add_size('CorrectSize', 10, 2) test_metric = metrics.cache[get_model_key('CorrectSize', 'GB', 'abcd', model_name)] assert test_metric.value == 200 assert test_metric.unit == 'Gigabytes' test_metric = metrics.cache[get_model_key('CorrectSize', 'MB', 'qwerty', model_name)] assert test_metric.value == 10 assert test_metric.unit == 'Megabytes' # Check a percentage metric metrics.add_percent('CorrectPercent', 20.0, 3) test_metric = metrics.cache[get_model_key('CorrectPercent', 'percent', 'hjshfj', model_name)] assert test_metric.value == 20.0 assert test_metric.unit == 'Percent' # Check a error metric metrics.add_error('CorrectError', 'Wrong values') test_metric = metrics.cache[get_error_key('CorrectError', '')] assert test_metric.value == 'Wrong values'
def load(self, model_name, model_dir, handler, gpu_id, batch_size): """ Load TorchServe 1.0 model from file. :param model_name: :param model_dir: :param handler: :param gpu_id: :param batch_size: :return: """ logging.debug("Loading model - working dir: %s", os.getcwd()) # TODO: Request ID is not given. UUID is a temp UUID. metrics = MetricsStore(uuid.uuid4(), model_name) manifest_file = os.path.join(model_dir, "MAR-INF/MANIFEST.json") manifest = None if os.path.exists(manifest_file): with open(manifest_file) as f: manifest = json.load(f) try: temp = handler.split(":", 1) module_name = temp[0] function_name = None if len(temp) == 1 else temp[1] if module_name.endswith(".py"): module_name = module_name[:-3] module_name = module_name.split("/")[-1] module = importlib.import_module(module_name) # pylint: disable=unused-variable except ImportError as e: module_name = ".{0}".format(handler) module = importlib.import_module(module_name, 'ts.torch_handler') function_name = None if module is None: raise ValueError( "Unable to load module {}, make sure it is added to python path" .format(module_name)) if function_name is None: function_name = "handle" if hasattr(module, function_name): entry_point = getattr(module, function_name) service = Service(model_name, model_dir, manifest, entry_point, gpu_id, batch_size) service.context.metrics = metrics # initialize model at load time entry_point(None, service.context) else: model_class_definitions = list_classes_from_module(module) if len(model_class_definitions) != 1: raise ValueError( "Expected only one class in custom service code or a function entry point {}" .format(model_class_definitions)) model_class = model_class_definitions[0] model_service = model_class() handle = getattr(model_service, "handle") if handle is None: raise ValueError("Expect handle method in class {}".format( str(model_class))) service = Service(model_name, model_dir, manifest, model_service.handle, gpu_id, batch_size) initialize = getattr(model_service, "initialize") if initialize is not None: model_service.initialize(service.context) else: raise ValueError("Expect initialize method in class {}".format( str(model_class))) return service