Beispiel #1
0
    def predict(self, batch):
        """
        PREDICT COMMAND = {
            "command": "predict",
            "batch": [ REQUEST_INPUT ]
        }
        :param batch: list of request
        :return:

        """
        headers, input_batch, req_id_map = Service.retrieve_data_for_inference(
            batch)

        self.context.request_ids = req_id_map
        self.context.request_processor = headers
        metrics = MetricsStore(req_id_map, self.context.model_name)
        self.context.metrics = metrics

        start_time = time.time()

        # noinspection PyBroadException
        try:
            ret = self._entry_point(input_batch, self.context)
        except MemoryError:
            logger.error("System out of memory", exc_info=True)
            return create_predict_response(None, req_id_map,
                                           "Out of resources", 507)
        except Exception as e:  # pylint: disable=broad-except
            logger.warning("Invoking custom service failed.", exc_info=True)
            message = "Prediction failed: " + str(e) + " \n " + str(
                traceback.format_exc())
            return create_predict_response(None, req_id_map, message, 503)

        if not isinstance(ret, list):
            logger.warning("model: %s, Invalid return type: %s.",
                           self.context.model_name, type(ret))
            return create_predict_response(None, req_id_map,
                                           "Invalid model predict output", 503)

        if len(ret) != len(input_batch):
            logger.warning(
                "model: %s, number of batch response mismatched, expect: %d, got: %d.",
                self.context.model_name, len(input_batch), len(ret))
            return create_predict_response(
                None, req_id_map, "number of batch response mismatched", 503)

        duration = round((time.time() - start_time) * 1000, 2)
        metrics.add_time(PREDICTION_METRIC, duration)

        return create_predict_response(ret,
                                       req_id_map,
                                       "Prediction success",
                                       200,
                                       context=self.context)
Beispiel #2
0
    def load(self,
             model_name,
             model_dir,
             handler,
             gpu_id,
             batch_size,
             envelope=None):
        """
        Load TorchServe 1.0 model from file.

        :param model_name:
        :param model_dir:
        :param handler:
        :param gpu_id:
        :param batch_size:
        :param envelope:
        :return:
        """
        logging.debug("Loading model - working dir: %s", os.getcwd())
        # TODO: Request ID is not given. UUID is a temp UUID.
        metrics = MetricsStore(uuid.uuid4(), model_name)
        manifest_file = os.path.join(model_dir, "MAR-INF/MANIFEST.json")
        manifest = None
        if os.path.exists(manifest_file):
            with open(manifest_file) as f:
                manifest = json.load(f)

        function_name = None
        try:
            module, function_name = self._load_handler_file(handler)
        except ImportError:
            module = self._load_default_handler(handler)

        if module is None:
            raise ValueError(
                "Unable to load module {}, make sure it is added to python path"
                .format(module_name))

        envelope_class = None
        if envelope is not None:
            envelope_class = self._load_default_envelope(envelope)

        function_name = function_name or "handle"
        if hasattr(module, function_name):
            entry_point, initialize_fn = self._get_function_entry_point(
                module, function_name)
        else:
            entry_point, initialize_fn = self._get_class_entry_point(module)

        if envelope_class is not None:
            envelope_instance = envelope_class(entry_point)
            entry_point = envelope_instance.handle

        service = Service(model_name, model_dir, manifest, entry_point, gpu_id,
                          batch_size)
        service.context.metrics = metrics
        initialize_fn(service.context)

        return service
Beispiel #3
0
    def __init__(self,
                 model_pt_file='model.pt',
                 model_dir='ts/torch_handler/unit_tests/models/tmp',
                 model_file='model.py',
                 gpu_id='0',
                 model_name="mnist"):
        self.manifest = {
            'model': {
                'serializedFile': model_pt_file,
            }
        }

        if model_file:
            self.manifest['model']['modelFile'] = model_file

        self.system_properties = {'model_dir': model_dir}

        if torch.cuda.is_available() and gpu_id:
            self.system_properties['gpu_id'] = gpu_id

        self.metrics = MetricsStore(uuid.uuid4(), model_name)
Beispiel #4
0
def test_metrics(caplog):
    """
    Test if metric classes methods behave as expected
    Also checks global metric service methods
    """
    # Create a batch of request ids
    request_ids = {0: 'abcd', 1: "xyz", 2: "qwerty", 3: "hjshfj"}
    all_req_ids = ','.join(request_ids.values())
    model_name = "dummy model"

    # Create a metrics objects
    metrics = MetricsStore(request_ids, model_name)

    # Counter tests
    metrics.add_counter('CorrectCounter', 1, 1)
    test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', 'xyz',
                                              model_name)]
    assert 'CorrectCounter' == test_metric.name
    metrics.add_counter('CorrectCounter', 1, 1)
    metrics.add_counter('CorrectCounter', 1, 3)
    metrics.add_counter('CorrectCounter', 1)
    test_metric = metrics.cache[get_model_key('CorrectCounter', 'count',
                                              all_req_ids, model_name)]
    assert 'CorrectCounter' == test_metric.name
    metrics.add_counter('CorrectCounter', 3)
    test_metric = metrics.cache[get_model_key('CorrectCounter', 'count', 'xyz',
                                              model_name)]
    assert test_metric.value == 2
    test_metric = metrics.cache[get_model_key('CorrectCounter', 'count',
                                              'hjshfj', model_name)]
    assert test_metric.value == 1
    test_metric = metrics.cache[get_model_key('CorrectCounter', 'count',
                                              all_req_ids, model_name)]
    assert test_metric.value == 4
    # Check what is emitted is correct
    emit_metrics(metrics.store)

    assert "hjshfj" in caplog.text
    assert "ModelName:dummy model" in caplog.text

    # Adding other types of metrics
    # Check for time metric
    with pytest.raises(Exception) as e_info:
        metrics.add_time('WrongTime', 20, 1, 'ns')
    assert "the unit for a timed metric should be one of ['ms', 's']" == e_info.value.args[
        0]

    metrics.add_time('CorrectTime', 20, 2, 's')
    metrics.add_time('CorrectTime', 20, 0)
    test_metric = metrics.cache[get_model_key('CorrectTime', 'ms', 'abcd',
                                              model_name)]
    assert test_metric.value == 20
    assert test_metric.unit == 'Milliseconds'
    test_metric = metrics.cache[get_model_key('CorrectTime', 's', 'qwerty',
                                              model_name)]
    assert test_metric.value == 20
    assert test_metric.unit == 'Seconds'
    # Size based metrics
    with pytest.raises(Exception) as e_info:
        metrics.add_size('WrongSize', 20, 1, 'TB')
    assert "The unit for size based metric is one of ['MB','kB', 'GB', 'B']" == e_info.value.args[
        0]

    metrics.add_size('CorrectSize', 200, 0, 'GB')
    metrics.add_size('CorrectSize', 10, 2)
    test_metric = metrics.cache[get_model_key('CorrectSize', 'GB', 'abcd',
                                              model_name)]
    assert test_metric.value == 200
    assert test_metric.unit == 'Gigabytes'
    test_metric = metrics.cache[get_model_key('CorrectSize', 'MB', 'qwerty',
                                              model_name)]
    assert test_metric.value == 10
    assert test_metric.unit == 'Megabytes'

    # Check a percentage metric
    metrics.add_percent('CorrectPercent', 20.0, 3)
    test_metric = metrics.cache[get_model_key('CorrectPercent', 'percent',
                                              'hjshfj', model_name)]
    assert test_metric.value == 20.0
    assert test_metric.unit == 'Percent'

    # Check a error metric
    metrics.add_error('CorrectError', 'Wrong values')
    test_metric = metrics.cache[get_error_key('CorrectError', '')]
    assert test_metric.value == 'Wrong values'
    def load(self, model_name, model_dir, handler, gpu_id, batch_size):
        """
        Load TorchServe 1.0 model from file.

        :param model_name:
        :param model_dir:
        :param handler:
        :param gpu_id:
        :param batch_size:
        :return:
        """
        logging.debug("Loading model - working dir: %s", os.getcwd())
        # TODO: Request ID is not given. UUID is a temp UUID.
        metrics = MetricsStore(uuid.uuid4(), model_name)
        manifest_file = os.path.join(model_dir, "MAR-INF/MANIFEST.json")
        manifest = None
        if os.path.exists(manifest_file):
            with open(manifest_file) as f:
                manifest = json.load(f)

        try:
            temp = handler.split(":", 1)
            module_name = temp[0]
            function_name = None if len(temp) == 1 else temp[1]
            if module_name.endswith(".py"):
                module_name = module_name[:-3]
            module_name = module_name.split("/")[-1]
            module = importlib.import_module(module_name)
            # pylint: disable=unused-variable
        except ImportError as e:
            module_name = ".{0}".format(handler)
            module = importlib.import_module(module_name, 'ts.torch_handler')
            function_name = None

        if module is None:
            raise ValueError(
                "Unable to load module {}, make sure it is added to python path"
                .format(module_name))
        if function_name is None:
            function_name = "handle"
        if hasattr(module, function_name):
            entry_point = getattr(module, function_name)
            service = Service(model_name, model_dir, manifest, entry_point,
                              gpu_id, batch_size)

            service.context.metrics = metrics
            # initialize model at load time
            entry_point(None, service.context)
        else:
            model_class_definitions = list_classes_from_module(module)
            if len(model_class_definitions) != 1:
                raise ValueError(
                    "Expected only one class in custom service code or a function entry point {}"
                    .format(model_class_definitions))

            model_class = model_class_definitions[0]
            model_service = model_class()
            handle = getattr(model_service, "handle")
            if handle is None:
                raise ValueError("Expect handle method in class {}".format(
                    str(model_class)))

            service = Service(model_name, model_dir, manifest,
                              model_service.handle, gpu_id, batch_size)
            initialize = getattr(model_service, "initialize")
            if initialize is not None:
                model_service.initialize(service.context)
            else:
                raise ValueError("Expect initialize method in class {}".format(
                    str(model_class)))

        return service