def CreateDecoderMetrics(self): base_metrics = { 'wer': metrics.AverageMetric(), 'cer': metrics.AverageMetric(), 'num_samples_in_batch': metrics.AverageMetric() } return base_metrics
def CreateDecoderMetrics(self): """Decoder metrics for WaymoOpenDataset.""" p = self.params waymo_metric_p = p.ap_metric.Copy().Set( cls=waymo_ap_metric.WaymoAPMetrics) waymo_metrics = waymo_metric_p.Instantiate() class_names = waymo_metrics.metadata.ClassNames() # TODO(bencaine,vrv): There's some code smell with this ap_metrics params # usage. We create local copies of the params to then instantiate them. # Failing to do this risks users editing the params after construction of # the object, making each object method call have the potential for side # effects. # Create a new dictionary with copies of the params converted to objects # so we can then add these to the decoder metrics. extra_ap_metrics = {} for k, metric_p in p.extra_ap_metrics.items(): extra_ap_metrics[k] = metric_p.Instantiate() waymo_metric_bev_p = waymo_metric_p.Copy() waymo_metric_bev_p.box_type = '2d' waymo_metrics_bev = waymo_metric_bev_p.Instantiate() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) # TODO(vrv): This uses the same top down transform as for KITTI; # re-visit these settings since detections can happen all around # the car. top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'waymo_metrics': waymo_metrics, 'waymo_metrics_bev': waymo_metrics_bev, }) self._update_metrics_class_keys = [ 'waymo_metrics_bev', 'waymo_metrics' ] for k, metric in extra_ap_metrics.items(): decoder_metrics[k] = metric self._update_metrics_class_keys.append(k) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() return decoder_metrics
def _EvalOnce(self, path, sess): """Runs evaluation for a batch of samples. Args: path: checkpoint path. sess: the tf Session. Returns: should_stop. """ if not FLAGS.evaler_in_same_address_as_controller: self._LoadCheckpointForEval(sess, path) global_step = sess.run(self._model.global_step) metrics_dict = { name: metrics.AverageMetric() for name in self._model_task.eval_metrics } num_samples_metric = metrics_dict['num_samples_in_batch'] while (num_samples_metric.total_value < self._model_task.params.eval.samples_per_summary): if self._summary_op is None: # No summaries were collected. ans = sess.run(self._model_task.eval_metrics) else: ans, summary = sess.run( [self._model_task.eval_metrics, self._summary_op]) self._summary_writer.add_summary(summary, global_step) for name, (value, weight) in six.iteritems(ans): metrics_dict[name].Update(value, weight) tf.logging.info('Total examples done: %d/%d', num_samples_metric.total_value, self._model_task.params.eval.samples_per_summary) # Replace average values with total values for certain metrics. if 'num_predictions' in metrics_dict: metrics_dict['num_predictions'].total_weight = 1.0 if 'num_words' in metrics_dict: metrics_dict['num_words'].total_weight = 1.0 # When we have evaluated so many samples, generate a summary. self._WriteSummaries( self._summary_writer, os.path.basename(self._eval_dir), global_step, {k: v.Summary(k) for k, v in six.iteritems(metrics_dict)}, text_filename=os.path.join(self._eval_dir, 'score-{:08d}.txt'.format(global_step))) is_final = global_step >= self.params.train.max_steps should_stop = self._trial.ReportEvalMeasure(global_step, metrics_dict, path) return should_stop or is_final
def CreateDecoderMetrics(self): """Decoder metrics for WaymoOpenDataset.""" p = self.params waymo_metric_p = p.ap_metric.Copy().Set( cls=waymo_ap_metric.WaymoAPMetrics) waymo_metrics = waymo_metric_p.Instantiate() class_names = waymo_metrics.metadata.ClassNames() for k, v in p.extra_ap_metrics.items(): p.extra_ap_metrics[k] = v.Instantiate() waymo_metric_bev_p = waymo_metric_p.Copy() waymo_metric_bev_p.box_type = '2d' waymo_metrics_bev = waymo_metric_bev_p.Instantiate() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) # TODO(vrv): This uses the same top down transform as for KITTI; # re-visit these settings since detections can happen all around # the car. top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'waymo_metrics': waymo_metrics, 'waymo_metrics_bev': waymo_metrics_bev, }) self._update_metrics_class_keys = [ 'waymo_metrics_bev', 'waymo_metrics' ] for k, v in p.extra_ap_metrics.items(): decoder_metrics[k] = v self._update_metrics_class_keys.append(k) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() return decoder_metrics
def testAverageMetric(self): m = metrics.AverageMetric() m.Update(1.0) m.Update(2.0, 10.0) self.assertEqual(1.0 + 2.0*10.0, m.total_value) expected_average = (1.0 + 2.0*10.0) / (1.0 + 10.0) self.assertEqual(expected_average, m.value) name = 'metric_name' self.assertEqual( tf.Summary(value=[tf.Summary.Value(tag=name, simple_value=expected_average)]), m.Summary(name)) # Calling m.Summary() does not reset statistics. m.Update(1.0) self.assertEqual(1.0 + 2.0*10.0 + 1.0, m.total_value)
def CreateDecoderMetrics(self): base_metrics = { 'num_samples_in_batch': metrics.AverageMetric(), 'wer': metrics.AverageMetric(), # Word error rate. 'norm_wer': metrics.AverageMetric(), # Normalized word error rate. 'sacc': metrics.AverageMetric(), # Sentence accuracy. 'ter': metrics.AverageMetric(), # Token error rate. 'corpus_bleu': metrics.CorpusBleuMetric(), 'oracle_norm_wer': metrics.AverageMetric(), } # Add any additional metrics that should be computed. base_metrics.update(self.CreateAdditionalDecoderMetrics()) return base_metrics
def CreateMetrics(self): base_metrics = { 'num_samples_in_batch': metrics.AverageMetric(), 'norm_wer': metrics.AverageMetric(), # Normalized word error rate. 'corpus_bleu': metrics.CorpusBleuMetric(), } if self.params.include_auxiliary_metrics: base_metrics.update({ 'wer': metrics.AverageMetric(), # Word error rate. 'sacc': metrics.AverageMetric(), # Sentence accuracy. 'ter': metrics.AverageMetric(), # Token error rate. 'oracle_norm_wer': metrics.AverageMetric(), }) return base_metrics
def CreateDecoderMetrics(self): """Decoder metrics for KITTI.""" p = self.params kitti_metric_p = p.ap_metric.Copy().Set( cls=kitti_ap_metric.KITTIAPMetrics) apm = kitti_metric_p.Instantiate() class_names = apm.metadata.ClassNames() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'kitti_AP_v2': apm, }) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() if p.summarize_boxes_on_image: decoder_metrics.camera_visualization = ( detection_3d_metrics.CameraVisualization( bbox_score_threshold=p. visualization_classification_threshold)) return decoder_metrics
def CreateDecoderMetrics(self): decoder_metrics = { 'num_samples_in_batch': metrics.AverageMetric(), 'corpus_bleu': metrics.CorpusBleuMetric(separator_type='wpm'), } return decoder_metrics
def CreateDecoderMetrics(self): return { 'num_samples_in_batch': metrics.AverageMetric(), }
def _EvalOnce(self, sess=None, path=''): """Eval a single checkpoint.""" with self._cluster: # Attempt to restore the checkpoint self._checkpointer.RestoreFromPath(checkpoint_path=path) # Save any additional information to disk before evaluation. if self._eval_type == 'train': self._task.Export(path) global_step = self._model.global_step.numpy() if global_step < self._task.params.eval.start_eval_after: return if self._task.input.params.resettable: tf.logging.info('Resetting input_generator.') self._task.input_generator.Reset() metrics_dict = None num_samples_metric = None samples_per_summary = self._task.params.eval.samples_per_summary if samples_per_summary == 0: assert self._task.input.params.resettable while (samples_per_summary == 0 or metrics_dict is None or num_samples_metric.total_value < samples_per_summary): try: # Evaler calls FProp multiple times for each checkpoint. Multiple # summaries at the same step is often confusing. Instead, models # should update eval_metrics and generate aggregate summaries. Other # types of summaries (images, audio etc.) will be generated for the # first batch only. eval_fn = ( self._eval_fn_with_summary if metrics_dict is None else self._eval_fn) eval_metrics = eval_fn() if metrics_dict is None: metrics_dict = { name: metrics.AverageMetric() for name in eval_metrics } num_samples_metric = metrics_dict['num_samples_in_batch'] eval_metrics = py_utils.Transform(lambda x: x.numpy(), eval_metrics) for name, (value, weight) in eval_metrics.items(): metrics_dict[name].Update(value, weight) tf.logging.info('Total examples done: %d/%d', num_samples_metric.total_value, samples_per_summary) except tf.errors.OutOfRangeError: if not self._task.input.params.resettable: raise break if metrics_dict is None: metrics_dict = {} # Replace average values with total values for certain metrics. if 'num_predictions' in metrics_dict: metrics_dict['num_predictions'].total_weight = 1.0 if 'num_words' in metrics_dict: metrics_dict['num_words'].total_weight = 1.0 msg = 'step:%6d' % global_step with self._summary_writer.as_default(): tf.compat.v2.summary.scalar( 'total_samples', num_samples_metric.total_value, step=global_step) for key, metric in sorted(metrics_dict.items()): msg += ' %s:%.8g' % (key, metric.value) tf.compat.v2.summary.scalar(key, metric.value, step=global_step) self._summary_writer.flush() self._SetStatusMessage(msg)
def _EvalOnce(self, path, sess): """Runs evaluation for a batch of samples. Args: path: checkpoint path. sess: the tf Session. Returns: should_stop. """ if not FLAGS.evaler_in_same_address_as_controller: self._LoadCheckpointForEval(sess, path) global_step = sess.run(self._model.global_step) metrics_dict = { name: metrics.AverageMetric() for name in self._model_task.eval_metrics } num_samples_metric = metrics_dict['num_samples_in_batch'] #from tensorflow.python import debug as tf_debug #sess = tf_debug.LocalCLIDebugWrapperSession(sess) while (num_samples_metric.total_value < self._model_task.params.eval.samples_per_summary): all_ops = [self._model_task.eval_metrics] all_keys = ['eval'] if hasattr(self._model_task, 'last_state_group_op'): all_ops.append(self._model_task.last_state_group_op) all_keys.append('update') if self._summary_op is not None: all_ops.append(self._summary_op) all_keys.append('summary') ret = sess.run(all_ops) # # No summaries were collected. # ans = sess.run([self._model_task.eval_metrics) #else: # ans, summary = sess.run( # [self._model_task.eval_metrics, self._summary_op]) # self._summary_writer.add_summary(summary, global_step) if 'summary' in all_keys: summary = ret[-1] self._summary_writer.add_summary(summary, global_step) ans = ret[0] for name, (value, weight) in six.iteritems(ans): metrics_dict[name].Update(value, weight) tf.logging.info('Total examples done: %d/%d', num_samples_metric.total_value, self._model_task.params.eval.samples_per_summary) # Replace average values with total values for certain metrics. if 'num_predictions' in metrics_dict: metrics_dict['num_predictions'].total_weight = 1.0 if 'num_words' in metrics_dict: metrics_dict['num_words'].total_weight = 1.0 # When we have evaluated so many samples, generate a summary. self._WriteSummaries( self._summary_writer, os.path.basename(self._eval_dir), global_step, {k: v.Summary(k) for k, v in six.iteritems(metrics_dict)}, text_filename=os.path.join(self._eval_dir, 'score-{:08d}.txt'.format(global_step))) is_final = global_step >= self.params.train.max_steps should_stop = self._trial.ReportEvalMeasure(global_step, metrics_dict, path) return should_stop or is_final
def CreateMetrics(self): base_metrics = { 'num_samples_in_batch': metrics.AverageMetric(), 'norm_wer': metrics.AverageMetric(), # Normalized word error rate. 'corpus_bleu': metrics.CorpusBleuMetric(), } if self.params.include_auxiliary_metrics: base_metrics.update({ # TODO(xingwu): fully replace 'wer' with 'error_rates/wer'. 'wer': metrics.AverageMetric(), # Word error rate. 'error_rates/ins': metrics.AverageMetric(), # Insert error rate. 'error_rates/sub': metrics.AverageMetric(), # Substitute error rate. 'error_rates/del': metrics.AverageMetric(), # Deletion error rate. 'error_rates/wer': metrics.AverageMetric(), # Word error rate. 'case_insensitive_error_rates/ins': metrics.AverageMetric(), # Insert case-insensitive error rate. 'case_insensitive_error_rates/sub': metrics.AverageMetric( ), # Substitute case-insensitive error rate. 'case_insensitive_error_rates/del': metrics.AverageMetric( ), # Deletion case-insensitive error rate. 'case_insensitive_error_rates/wer': metrics.AverageMetric(), # Case-insensitive Word error rate. 'sacc': metrics.AverageMetric(), # Sentence accuracy. 'ter': metrics.AverageMetric(), # Token error rate. 'oracle_norm_wer': metrics.AverageMetric(), 'oracle/ins': metrics.AverageMetric(), 'oracle/sub': metrics.AverageMetric(), 'oracle/del': metrics.AverageMetric(), }) return base_metrics