Example #1
0
 def CreateDecoderMetrics(self):
     base_metrics = {
         'wer': metrics.AverageMetric(),
         'cer': metrics.AverageMetric(),
         'num_samples_in_batch': metrics.AverageMetric()
     }
     return base_metrics
Example #2
0
    def CreateDecoderMetrics(self):
        """Decoder metrics for WaymoOpenDataset."""
        p = self.params

        waymo_metric_p = p.ap_metric.Copy().Set(
            cls=waymo_ap_metric.WaymoAPMetrics)
        waymo_metrics = waymo_metric_p.Instantiate()
        class_names = waymo_metrics.metadata.ClassNames()

        # TODO(bencaine,vrv): There's some code smell with this ap_metrics params
        # usage. We create local copies of the params to then instantiate them.
        # Failing to do this risks users editing the params after construction of
        # the object, making each object method call have the potential for side
        # effects.
        # Create a new dictionary with copies of the params converted to objects
        # so we can then add these to the decoder metrics.
        extra_ap_metrics = {}
        for k, metric_p in p.extra_ap_metrics.items():
            extra_ap_metrics[k] = metric_p.Instantiate()

        waymo_metric_bev_p = waymo_metric_p.Copy()
        waymo_metric_bev_p.box_type = '2d'
        waymo_metrics_bev = waymo_metric_bev_p.Instantiate()
        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        # TODO(vrv): This uses the same top down transform as for KITTI;
        # re-visit these settings since detections can happen all around
        # the car.
        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)
        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'waymo_metrics':
            waymo_metrics,
            'waymo_metrics_bev':
            waymo_metrics_bev,
        })
        self._update_metrics_class_keys = [
            'waymo_metrics_bev', 'waymo_metrics'
        ]
        for k, metric in extra_ap_metrics.items():
            decoder_metrics[k] = metric
            self._update_metrics_class_keys.append(k)

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()
        return decoder_metrics
Example #3
0
  def _EvalOnce(self, path, sess):
    """Runs evaluation for a batch of samples.

    Args:
      path: checkpoint path.
      sess: the tf Session.

    Returns:
      should_stop.
    """
    if not FLAGS.evaler_in_same_address_as_controller:
      self._LoadCheckpointForEval(sess, path)

    global_step = sess.run(self._model.global_step)
    metrics_dict = {
        name: metrics.AverageMetric() for name in self._model_task.eval_metrics
    }
    num_samples_metric = metrics_dict['num_samples_in_batch']
    while (num_samples_metric.total_value <
           self._model_task.params.eval.samples_per_summary):
      if self._summary_op is None:
        # No summaries were collected.
        ans = sess.run(self._model_task.eval_metrics)
      else:
        ans, summary = sess.run(
            [self._model_task.eval_metrics, self._summary_op])
        self._summary_writer.add_summary(summary, global_step)
      for name, (value, weight) in six.iteritems(ans):
        metrics_dict[name].Update(value, weight)
      tf.logging.info('Total examples done: %d/%d',
                      num_samples_metric.total_value,
                      self._model_task.params.eval.samples_per_summary)

    # Replace average values with total values for certain metrics.
    if 'num_predictions' in metrics_dict:
      metrics_dict['num_predictions'].total_weight = 1.0
    if 'num_words' in metrics_dict:
      metrics_dict['num_words'].total_weight = 1.0

    # When we have evaluated so many samples, generate a summary.
    self._WriteSummaries(
        self._summary_writer,
        os.path.basename(self._eval_dir),
        global_step, {k: v.Summary(k) for k, v in six.iteritems(metrics_dict)},
        text_filename=os.path.join(self._eval_dir,
                                   'score-{:08d}.txt'.format(global_step)))

    is_final = global_step >= self.params.train.max_steps
    should_stop = self._trial.ReportEvalMeasure(global_step, metrics_dict, path)
    return should_stop or is_final
Example #4
0
    def CreateDecoderMetrics(self):
        """Decoder metrics for WaymoOpenDataset."""
        p = self.params

        waymo_metric_p = p.ap_metric.Copy().Set(
            cls=waymo_ap_metric.WaymoAPMetrics)
        waymo_metrics = waymo_metric_p.Instantiate()
        class_names = waymo_metrics.metadata.ClassNames()

        for k, v in p.extra_ap_metrics.items():
            p.extra_ap_metrics[k] = v.Instantiate()

        waymo_metric_bev_p = waymo_metric_p.Copy()
        waymo_metric_bev_p.box_type = '2d'
        waymo_metrics_bev = waymo_metric_bev_p.Instantiate()
        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        # TODO(vrv): This uses the same top down transform as for KITTI;
        # re-visit these settings since detections can happen all around
        # the car.
        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)
        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'waymo_metrics':
            waymo_metrics,
            'waymo_metrics_bev':
            waymo_metrics_bev,
        })
        self._update_metrics_class_keys = [
            'waymo_metrics_bev', 'waymo_metrics'
        ]
        for k, v in p.extra_ap_metrics.items():
            decoder_metrics[k] = v
            self._update_metrics_class_keys.append(k)

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()
        return decoder_metrics
Example #5
0
  def testAverageMetric(self):
    m = metrics.AverageMetric()
    m.Update(1.0)
    m.Update(2.0, 10.0)

    self.assertEqual(1.0 + 2.0*10.0, m.total_value)
    expected_average = (1.0 + 2.0*10.0) / (1.0 + 10.0)
    self.assertEqual(expected_average, m.value)

    name = 'metric_name'
    self.assertEqual(
        tf.Summary(value=[tf.Summary.Value(tag=name,
                                           simple_value=expected_average)]),
        m.Summary(name))

    # Calling m.Summary() does not reset statistics.
    m.Update(1.0)
    self.assertEqual(1.0 + 2.0*10.0 + 1.0, m.total_value)
Example #6
0
    def CreateDecoderMetrics(self):
        base_metrics = {
            'num_samples_in_batch': metrics.AverageMetric(),
            'wer': metrics.AverageMetric(),  # Word error rate.
            'norm_wer': metrics.AverageMetric(),  # Normalized word error rate.
            'sacc': metrics.AverageMetric(),  # Sentence accuracy.
            'ter': metrics.AverageMetric(),  # Token error rate.
            'corpus_bleu': metrics.CorpusBleuMetric(),
            'oracle_norm_wer': metrics.AverageMetric(),
        }

        # Add any additional metrics that should be computed.
        base_metrics.update(self.CreateAdditionalDecoderMetrics())
        return base_metrics
Example #7
0
    def CreateMetrics(self):
        base_metrics = {
            'num_samples_in_batch': metrics.AverageMetric(),
            'norm_wer': metrics.AverageMetric(),  # Normalized word error rate.
            'corpus_bleu': metrics.CorpusBleuMetric(),
        }

        if self.params.include_auxiliary_metrics:
            base_metrics.update({
                'wer': metrics.AverageMetric(),  # Word error rate.
                'sacc': metrics.AverageMetric(),  # Sentence accuracy.
                'ter': metrics.AverageMetric(),  # Token error rate.
                'oracle_norm_wer': metrics.AverageMetric(),
            })

        return base_metrics
Example #8
0
    def CreateDecoderMetrics(self):
        """Decoder metrics for KITTI."""
        p = self.params

        kitti_metric_p = p.ap_metric.Copy().Set(
            cls=kitti_ap_metric.KITTIAPMetrics)
        apm = kitti_metric_p.Instantiate()
        class_names = apm.metadata.ClassNames()

        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)

        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'kitti_AP_v2':
            apm,
        })

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()

        if p.summarize_boxes_on_image:
            decoder_metrics.camera_visualization = (
                detection_3d_metrics.CameraVisualization(
                    bbox_score_threshold=p.
                    visualization_classification_threshold))

        return decoder_metrics
Example #9
0
 def CreateDecoderMetrics(self):
   decoder_metrics = {
       'num_samples_in_batch': metrics.AverageMetric(),
       'corpus_bleu': metrics.CorpusBleuMetric(separator_type='wpm'),
   }
   return decoder_metrics
Example #10
0
 def CreateDecoderMetrics(self):
   return {
       'num_samples_in_batch': metrics.AverageMetric(),
   }
Example #11
0
  def _EvalOnce(self, sess=None, path=''):
    """Eval a single checkpoint."""
    with self._cluster:
      # Attempt to restore the checkpoint
      self._checkpointer.RestoreFromPath(checkpoint_path=path)

      # Save any additional information to disk before evaluation.
      if self._eval_type == 'train':
        self._task.Export(path)

      global_step = self._model.global_step.numpy()
      if global_step < self._task.params.eval.start_eval_after:
        return

      if self._task.input.params.resettable:
        tf.logging.info('Resetting input_generator.')
        self._task.input_generator.Reset()

      metrics_dict = None
      num_samples_metric = None
      samples_per_summary = self._task.params.eval.samples_per_summary
      if samples_per_summary == 0:
        assert self._task.input.params.resettable
      while (samples_per_summary == 0 or metrics_dict is None or
             num_samples_metric.total_value < samples_per_summary):
        try:
          # Evaler calls FProp multiple times for each checkpoint. Multiple
          # summaries at the same step is often confusing.  Instead, models
          # should update eval_metrics and generate aggregate summaries. Other
          # types of summaries (images, audio etc.) will be generated for the
          # first batch only.
          eval_fn = (
              self._eval_fn_with_summary
              if metrics_dict is None else self._eval_fn)
          eval_metrics = eval_fn()

          if metrics_dict is None:
            metrics_dict = {
                name: metrics.AverageMetric() for name in eval_metrics
            }
            num_samples_metric = metrics_dict['num_samples_in_batch']

          eval_metrics = py_utils.Transform(lambda x: x.numpy(), eval_metrics)
          for name, (value, weight) in eval_metrics.items():
            metrics_dict[name].Update(value, weight)
          tf.logging.info('Total examples done: %d/%d',
                          num_samples_metric.total_value, samples_per_summary)
        except tf.errors.OutOfRangeError:
          if not self._task.input.params.resettable:
            raise
          break

      if metrics_dict is None:
        metrics_dict = {}

      # Replace average values with total values for certain metrics.
      if 'num_predictions' in metrics_dict:
        metrics_dict['num_predictions'].total_weight = 1.0
      if 'num_words' in metrics_dict:
        metrics_dict['num_words'].total_weight = 1.0

      msg = 'step:%6d' % global_step
      with self._summary_writer.as_default():
        tf.compat.v2.summary.scalar(
            'total_samples', num_samples_metric.total_value, step=global_step)
        for key, metric in sorted(metrics_dict.items()):
          msg += ' %s:%.8g' % (key, metric.value)
          tf.compat.v2.summary.scalar(key, metric.value, step=global_step)
        self._summary_writer.flush()
      self._SetStatusMessage(msg)
Example #12
0
  def _EvalOnce(self, path, sess):
    """Runs evaluation for a batch of samples.

    Args:
      path: checkpoint path.
      sess: the tf Session.

    Returns:
      should_stop.
    """
    if not FLAGS.evaler_in_same_address_as_controller:
      self._LoadCheckpointForEval(sess, path)

    global_step = sess.run(self._model.global_step)
    metrics_dict = {
        name: metrics.AverageMetric() for name in self._model_task.eval_metrics
    }
    num_samples_metric = metrics_dict['num_samples_in_batch']
    
    #from tensorflow.python import debug as tf_debug
    #sess = tf_debug.LocalCLIDebugWrapperSession(sess)

    while (num_samples_metric.total_value <
           self._model_task.params.eval.samples_per_summary):
      all_ops = [self._model_task.eval_metrics]
      all_keys = ['eval']
      if hasattr(self._model_task, 'last_state_group_op'):
        all_ops.append(self._model_task.last_state_group_op)
        all_keys.append('update')
      if self._summary_op is not None:
        all_ops.append(self._summary_op)
        all_keys.append('summary')

      ret = sess.run(all_ops) 
      #  # No summaries were collected.
      #  ans = sess.run([self._model_task.eval_metrics)
      #else:
      #  ans, summary = sess.run(
      #      [self._model_task.eval_metrics, self._summary_op])
      #  self._summary_writer.add_summary(summary, global_step)
      if 'summary' in all_keys:
        summary = ret[-1]
        self._summary_writer.add_summary(summary, global_step)
      ans = ret[0]
      for name, (value, weight) in six.iteritems(ans):
        metrics_dict[name].Update(value, weight)
      tf.logging.info('Total examples done: %d/%d',
                      num_samples_metric.total_value,
                      self._model_task.params.eval.samples_per_summary)

    # Replace average values with total values for certain metrics.
    if 'num_predictions' in metrics_dict:
      metrics_dict['num_predictions'].total_weight = 1.0
    if 'num_words' in metrics_dict:
      metrics_dict['num_words'].total_weight = 1.0

    # When we have evaluated so many samples, generate a summary.
    self._WriteSummaries(
        self._summary_writer,
        os.path.basename(self._eval_dir),
        global_step, {k: v.Summary(k) for k, v in six.iteritems(metrics_dict)},
        text_filename=os.path.join(self._eval_dir,
                                   'score-{:08d}.txt'.format(global_step)))

    is_final = global_step >= self.params.train.max_steps
    should_stop = self._trial.ReportEvalMeasure(global_step, metrics_dict, path)
    return should_stop or is_final
Example #13
0
    def CreateMetrics(self):
        base_metrics = {
            'num_samples_in_batch': metrics.AverageMetric(),
            'norm_wer': metrics.AverageMetric(),  # Normalized word error rate.
            'corpus_bleu': metrics.CorpusBleuMetric(),
        }

        if self.params.include_auxiliary_metrics:
            base_metrics.update({
                # TODO(xingwu): fully replace 'wer' with 'error_rates/wer'.
                'wer':
                metrics.AverageMetric(),  # Word error rate.
                'error_rates/ins':
                metrics.AverageMetric(),  # Insert error rate.
                'error_rates/sub':
                metrics.AverageMetric(),  # Substitute error rate.
                'error_rates/del':
                metrics.AverageMetric(),  # Deletion error rate.
                'error_rates/wer':
                metrics.AverageMetric(),  # Word error rate.
                'case_insensitive_error_rates/ins':
                metrics.AverageMetric(),  # Insert case-insensitive error rate.
                'case_insensitive_error_rates/sub':
                metrics.AverageMetric(
                ),  # Substitute case-insensitive error rate.
                'case_insensitive_error_rates/del':
                metrics.AverageMetric(
                ),  # Deletion case-insensitive error rate.
                'case_insensitive_error_rates/wer':
                metrics.AverageMetric(),  # Case-insensitive Word error rate.
                'sacc':
                metrics.AverageMetric(),  # Sentence accuracy.
                'ter':
                metrics.AverageMetric(),  # Token error rate.
                'oracle_norm_wer':
                metrics.AverageMetric(),
                'oracle/ins':
                metrics.AverageMetric(),
                'oracle/sub':
                metrics.AverageMetric(),
                'oracle/del':
                metrics.AverageMetric(),
            })

        return base_metrics