Beispiel #1
0
  def evaluate_tflite(self, tflite_filepath, data):
    """Evaluates the tflite model.

    Args:
      tflite_filepath: File path to the TFLite model.
      data: Data to be evaluated.

    Returns:
      The evaluation result of TFLite model - accuracy.
    """
    ds = data.gen_dataset(
        batch_size=1, is_training=False, preprocess=self.preprocess)

    predictions, labels = [], []

    lite_runner = model_util.get_lite_runner(tflite_filepath, self.model_spec)
    for i, (feature, label) in enumerate(data_util.generate_elements(ds)):
      log_steps = 1000
      tf.compat.v1.logging.log_every_n(tf.compat.v1.logging.INFO,
                                       'Processing example: #%d\n%s', log_steps,
                                       i, feature)

      probabilities = lite_runner.run(feature)
      predictions.append(np.argmax(probabilities))

      # Gets the ground-truth labels.
      label = label[0]
      if label.size > 1:  # one-hot tensor.
        label = np.argmax(label)
      labels.append(label)

    predictions, labels = np.array(predictions), np.array(labels)
    result = {'accuracy': (predictions == labels).mean()}
    return result
    def evaluate_tflite(self, tflite_filepath, data):
        """Evaluates the tflite model.

    The data is padded to required length, and multiple metrics are evaluated.

    Args:
      tflite_filepath: File path to the TFLite model.
      data: Data to be evaluated.

    Returns:
      Dict of (metric, value), evaluation result of TFLite model.
    """
        lite_runner = model_util.get_lite_runner(tflite_filepath,
                                                 self.model_spec)
        ds = data.gen_dataset(batch_size=1, is_training=False)

        max_output_size = data.max_vocab_id + 1  # +1 because 0 is reserved for OOV.
        eval_top_k = self.model_spec.params['eval_top_k']
        metrics = [
            _metrics.GlobalRecall(name=f'Global_Recall/Recall_{k}', top_k=k)
            for k in eval_top_k
        ]
        for feature, label in data_util.generate_elements(ds):
            context = feature['context'][0].astype(np.int32)
            # x: shape [max_length], padded with PAD_ID
            if len(context) == self._max_history_length:
                x = context
            elif len(context) > self._max_history_length:
                x = context[:self._max_history_length]
            else:
                # Padding.
                x = np.empty([self._max_history_length], dtype=np.int32)
                x.fill(self.OOV_ID)
                x[:len(context)] = context

            # ids, scores: shape [top_k]
            ids, scores = lite_runner.run(x)

            # y_true: shape [1, 1]
            y_true = label
            # y_pred: shape [1, max_output_size]; fill only scores with top-k ids.
            y_pred = np.zeros([1, max_output_size])
            for i, score in zip(ids, scores):
                if i in data.vocab:  # Only set if id is in vocab.
                    y_pred[0, i] = score

            # Update metrics.
            for m in metrics:
                m.update_state(y_true, y_pred)
        result = collections.OrderedDict([(m.name, m.result())
                                          for m in metrics])
        return result
    def evaluate_tflite(self, tflite_filepath, data):
        """Evaluates the tflite model.

    The data is padded to required length, and multiple metrics are evaluated.

    Args:
      tflite_filepath: File path to the TFLite model.
      data: Data to be evaluated.

    Returns:
      Dict of (metric, value), evaluation result of TFLite model.
    """
        label_name = self.input_spec.label_feature.feature_name
        lite_runner = model_util.get_lite_runner(tflite_filepath,
                                                 self.model_spec)
        ds = data.gen_dataset(batch_size=1, is_training=False)

        max_output_size = data.max_vocab_id + 1  # +1 because 0 is reserved for OOV.
        eval_top_k = self.model_hparams.eval_top_k
        metrics = [
            _metrics.GlobalRecall(top_k=k, name=f'Global_Recall/Recall_{k}')
            for k in eval_top_k
        ]
        for feature, y_true in data_util.generate_elements(ds):
            feature.pop(label_name)
            x = feature
            ids, scores = lite_runner.run(x)

            # y_true: shape [1, 1]
            # y_pred: shape [1, max_output_size]; fill only scores with top-k ids.
            y_pred = np.zeros([1, max_output_size])
            for i, score in zip(ids, scores):
                if i in data.vocab:  # Only set if id is in vocab.
                    y_pred[0, i] = score

            # Update metrics.
            for m in metrics:
                m.update_state(y_true, y_pred)
        result = collections.OrderedDict([(m.name, m.result())
                                          for m in metrics])
        return result
  def evaluate_tflite(self, tflite_filepath, data):
    ds = data.gen_dataset(batch_size=1, is_training=False)

    predictions, labels = [], []

    lite_runner = model_util.get_lite_runner(tflite_filepath, self.model_spec)
    for i, (feature, label) in enumerate(data_util.generate_elements(ds)):
      log_steps = 1000
      tf.compat.v1.logging.log_every_n(tf.compat.v1.logging.INFO,
                                       'Processing example: #%d\n%s', log_steps,
                                       i, feature)

      probability = lite_runner.run(feature)  # Shape: (batch=1, 1)
      probability = probability.flatten()[0]  # Get sclar value
      predictions.append(probability > 0.5)

      label = label[0]
      labels.append(label)

    predictions = np.array(predictions).astype(int)
    labels = np.array(labels).astype(int)

    return {'accuracy': (predictions == labels).mean()}