Exemple #1
0
 def _report_benchmark(self, summary, start_time_sec, wall_time_sec):
     metrics = [{
         'name': 'train_loss',
         'value': summary['train_loss'],
     }, {
         'name':
         'example_per_second',
         'value':
         self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
                                                  FLAGS.steps_per_loop)
     }, {
         'name':
         'startup_time',
         'value':
         self.timer_callback.get_startup_time(start_time_sec)
     }, {
         'name': 'masked_lm_accuracy',
         'value': summary['masked_lm_accuracy'],
         'min_value': MIN_MLM_ACCURACY,
         'max_value': MAX_MLM_ACCURACY,
     }, {
         'name': 'next_sentence_accuracy',
         'value': summary['next_sentence_accuracy'],
         'min_value': MIN_NSP_ACCURACY,
         'max_value': MAX_NSP_ACCURACY,
     }]
     self.report_benchmark(
         iters=summary['total_training_steps'],
         wall_time=wall_time_sec,
         metrics=metrics,
         extras={'flags': flags_core.get_nondefault_flags_as_str()})
Exemple #2
0
    def _report_benchmark(self,
                          stats,
                          wall_time_sec,
                          max_value=None,
                          min_value=None):
        """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from keras models with known entries.
      wall_time_sec: the during of the benchmark execution in seconds
      max_value: highest passing level.
      min_value: lowest passing level.
    """

        metrics = []
        metrics.append({
            'name': 'training_loss',
            'value': stats['training_loss'],
            'min_value': min_value,
            'max_value': max_value
        })
        # These metrics are placeholders to avoid PerfZero failure.
        metrics.append({
            'name': 'exp_per_second',
            'value': 0.0,
        })
        metrics.append({
            'name': 'startup_time',
            'value': 9999.,
        })
        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=-1,
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
Exemple #3
0
    def _report_benchmark(self, stats, wall_time_sec, min_accuracy,
                          max_accuracy):
        """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from BERT models with known entries.
      wall_time_sec: the during of the benchmark execution in seconds
      min_accuracy: Minimum classification accuracy constraint to verify
        correctness of the model.
      max_accuracy: Maximum classification accuracy constraint to verify
        correctness of the model.
    """
        metrics = [{
            'name': 'training_loss',
            'value': stats['train_loss'],
        }, {
            'name':
            'exp_per_second',
            'value':
            self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size)
        }]

        if 'eval_metrics' in stats:
            metrics.append({
                'name': 'eval_accuracy',
                'value': stats['eval_metrics'],
                'min_value': min_accuracy,
                'max_value': max_accuracy,
            })
        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=stats['total_training_steps'],
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
  def _report_benchmark(self,
                        stats,
                        wall_time_sec,
                        top_1_max=None,
                        top_1_min=None,
                        total_batch_size=None,
                        log_steps=None,
                        warmup=1):
    """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from keras models with known entries.
      wall_time_sec: the during of the benchmark execution in seconds
      top_1_max: highest passing level for top_1 accuracy.
      top_1_min: lowest passing level for top_1 accuracy.
      total_batch_size: Global batch-size.
      log_steps: How often the log was created for stats['step_timestamp_log'].
      warmup: number of entries in stats['step_timestamp_log'] to ignore.
    """

    metrics = []
    if 'eval_acc' in stats:
      metrics.append({
          'name': 'accuracy_top_1',
          'value': stats['eval_acc'],
          'min_value': top_1_min,
          'max_value': top_1_max
      })
      metrics.append({'name': 'eval_loss', 'value': stats['eval_loss']})

      metrics.append({
          'name': 'top_1_train_accuracy',
          'value': stats['train_acc']
      })
      metrics.append({'name': 'train_loss', 'value': stats['train_loss']})

    if (warmup and 'step_timestamp_log' in stats and
        len(stats['step_timestamp_log']) > warmup):
      # first entry in the time_log is start of step 1. The rest of the
      # entries are the end of each step recorded
      time_log = stats['step_timestamp_log']
      elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
      num_examples = (
          total_batch_size * log_steps * (len(time_log) - warmup - 1))
      examples_per_sec = num_examples / elapsed
      metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})

    if 'avg_exp_per_second' in stats:
      metrics.append({
          'name': 'avg_exp_per_second',
          'value': stats['avg_exp_per_second']
      })

    flags_str = flags_core.get_nondefault_flags_as_str()
    self.report_benchmark(
        iters=-1,
        wall_time=wall_time_sec,
        metrics=metrics,
        extras={'flags': flags_str})
    def _run_and_report_benchmark(self,
                                  top_1_train_min=0.923,
                                  top_1_train_max=0.93,
                                  warmup=1,
                                  log_steps=100):
        """Report benchmark results by writing to local protobuf file.

    Average epoch time is calculated by skipping the first epoch. This average
    ignores time spent between epoch and is recorded by begin and end epoch. To
    skip accuracy check set `top_1_train_min=None`.

    Args:
      top_1_train_min: lowest passing value.
      top_1_train_max: highest passing value.
      warmup: number of entries in `timestamp_log` to ignore.
      log_steps: How often the log was created for `timestamp_log`.
    """
        total_batch_size = FLAGS.batch_size
        metrics = []
        start_time_sec = time.time()
        stats = shakespeare_main.run(FLAGS)
        wall_time_sec = time.time() - start_time_sec

        if top_1_train_min:
            metrics.append({
                'name': 'accuracy_top_1_train',
                'value': stats['history']['RecallAt1'][-1],
                'min_value': top_1_train_min,
                'max_value': top_1_train_max
            })

        # Look for the time history callback which was used during keras.fit
        for callback in stats['callbacks']:
            if isinstance(callback, keras_utils.TimeHistory):
                epoch_timings = callback.epoch_runtime_log
                average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:])
                metrics.append({
                    'name': 'avg_epoch_time',
                    'value': average_time
                })

            # First entry in timestamp_log is the start of step 1. The rest of the
            # entries are the end of each step recorded.
            time_log = callback.timestamp_log
            elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
            num_examples = (total_batch_size * log_steps *
                            (len(time_log) - warmup - 1))
            examples_per_sec = num_examples / elapsed
            metrics.append({
                'name': 'exp_per_second',
                'value': examples_per_sec
            })

        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=-1,
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
  def _run_and_report_benchmark(self,
                                bleu_max=None,
                                bleu_min=None,
                                log_steps=None,
                                total_batch_size=None,
                                warmup=1):
    """Report benchmark results by writing to local protobuf file.

    Args:
      bleu_max: highest passing level for bleu score.
      bleu_min: lowest passing level for bleu score.
      log_steps: How often the log was created for stats['step_timestamp_log'].
      total_batch_size: Global batch-size.
      warmup: number of entries in stats['step_timestamp_log'] to ignore.
    """
    start_time_sec = time.time()
    task = transformer_main.TransformerTask(FLAGS)
    stats = task.train()
    wall_time_sec = time.time() - start_time_sec

    metrics = []
    if 'bleu_uncased' in stats:
      if 'bleu_uncased_history' in stats:
        bleu_uncased_best = max(stats['bleu_uncased_history'],
                                key=lambda x: x[1])
        metrics.append({'name': 'bleu_uncased',
                        'value': bleu_uncased_best[1],
                        'min_value': bleu_min,
                        'max_value': bleu_max})
        metrics.append({'name': 'bleu_best_score_iteration',
                        'value': bleu_uncased_best[0]})
        metrics.append({'name': 'bleu_uncased_last',
                        'value': stats['bleu_uncased']})
      else:
        metrics.append({'name': 'bleu_uncased',
                        'value': stats['bleu_uncased'],
                        'min_value': bleu_min,
                        'max_value': bleu_max})

    if (warmup and 'step_timestamp_log' in stats and
        len(stats['step_timestamp_log']) > warmup):
      # first entry in the time_log is start of step 1. The rest of the
      # entries are the end of each step recorded
      time_log = stats['step_timestamp_log']
      elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
      num_examples = (
          total_batch_size * log_steps * (len(time_log) - warmup - 1))
      examples_per_sec = num_examples / elapsed
      metrics.append({'name': 'exp_per_second',
                      'value': examples_per_sec})

    if 'avg_exp_per_second' in stats:
      metrics.append({'name': 'avg_exp_per_second',
                      'value': stats['avg_exp_per_second']})

    flags_str = flags_core.get_nondefault_flags_as_str()
    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics,
                          extras={'flags': flags_str})
Exemple #7
0
    def _run_and_report_benchmark(self, summary_path: str):
        """Runs and reports the benchmark given the provided configuration."""
        distribution = distribution_utils.get_distribution_strategy(
            distribution_strategy='tpu', tpu_address=FLAGS.tpu)
        logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str())
        start_time_sec = time.time()
        run_pretraining.run_bert_pretrain(strategy=distribution,
                                          custom_callbacks=self.timer_callback)
        wall_time_sec = time.time() - start_time_sec

        with tf.io.gfile.GFile(summary_path, 'rb') as reader:
            summary = json.loads(reader.read().decode('utf-8'))
        self._report_benchmark(summary, start_time_sec, wall_time_sec)
    def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap,
                          max_ap, warmup):
        """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from Detection models with known entries.
      start_time_sec: the start of the benchmark execution in seconds
      wall_time_sec: the duration of the benchmark execution in seconds
      min_ap: Minimum detection AP constraint to verify correctness of the
        model.
      max_ap: Maximum detection AP accuracy constraint to verify correctness of
        the model.
      warmup: Number of time log entries to ignore when computing examples/sec.
    """
        metrics = [{
            'name': 'total_loss',
            'value': stats['total_loss'],
        }]
        if self.timer_callback:
            metrics.append({
                'name':
                'exp_per_second',
                'value':
                self.timer_callback.get_examples_per_sec(warmup)
            })
            metrics.append({
                'name':
                'startup_time',
                'value':
                self.timer_callback.get_startup_time(start_time_sec)
            })
        else:
            metrics.append({
                'name': 'exp_per_second',
                'value': 0.0,
            })

        if 'eval_metrics' in stats:
            metrics.append({
                'name': 'AP',
                'value': stats['AP'],
                'min_value': min_ap,
                'max_value': max_ap,
            })
        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=stats['total_steps'],
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
    def _report_benchmark(self,
                          stats,
                          wall_time_sec,
                          min_ap,
                          max_ap,
                          train_batch_size=None):
        """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from Detection models with known entries.
      wall_time_sec: the during of the benchmark execution in seconds
      min_ap: Minimum detection AP constraint to verify correctness of the
        model.
      max_ap: Maximum detection AP accuracy constraint to verify correctness of
        the model.
      train_batch_size: Train batch size. It is needed for computing
        exp_per_second.
    """
        metrics = [{
            'name': 'total_loss',
            'value': stats['total_loss'],
        }]
        if self.timer_callback:
            metrics.append({
                'name':
                'exp_per_second',
                'value':
                self.timer_callback.get_examples_per_sec(
                    FLAGS.train_batch_size)
            })
        else:
            metrics.append({
                'name': 'exp_per_second',
                'value': 0.0,
            })

        if 'eval_metrics' in stats:
            metrics.append({
                'name': 'AP',
                'value': stats['AP'],
                'min_value': min_ap,
                'max_value': max_ap,
            })
        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=stats['total_steps'],
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
Exemple #10
0
    def _report_benchmark(self,
                          stats,
                          wall_time_sec,
                          top_1_max=None,
                          top_1_min=None):
        """Report benchmark results by writing to local protobuf file.

    Args:
      stats: dict returned from estimator models with known entries.
      wall_time_sec: the during of the benchmark execution in seconds
      top_1_max: highest passing level for top_1 accuracy.
      top_1_min: lowest passing level for top_1 accuracy.
    """

        examples_per_sec_hook = None
        for hook in stats['train_hooks']:
            if isinstance(hook, hooks.ExamplesPerSecondHook):
                examples_per_sec_hook = hook
                break

        eval_results = stats['eval_results']
        metrics = []
        if 'accuracy' in eval_results:
            metrics.append({
                'name': 'accuracy_top_1',
                'value': eval_results['accuracy'].item(),
                'min_value': top_1_min,
                'max_value': top_1_max
            })
        if 'accuracy_top_5' in eval_results:
            metrics.append({
                'name': 'accuracy_top_5',
                'value': eval_results['accuracy_top_5'].item()
            })

        if examples_per_sec_hook:
            exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list
            # ExamplesPerSecondHook skips the first 10 steps.
            exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list))
            metrics.append({'name': 'exp_per_second', 'value': exp_per_sec})
        flags_str = flags_core.get_nondefault_flags_as_str()
        self.report_benchmark(iters=eval_results.get('global_step', None),
                              wall_time=wall_time_sec,
                              metrics=metrics,
                              extras={'flags': flags_str})
    def test_get_nondefault_flags_as_str(self):
        defaults = dict(clean=True,
                        data_dir="abc",
                        hooks=["LoggingTensorHook"],
                        stop_threshold=1.5,
                        use_synthetic_data=False)
        flags_core.set_defaults(**defaults)
        flags_core.parse_flags()

        expected_flags = ""
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        flags.FLAGS.clean = False
        expected_flags += "--noclean"
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        flags.FLAGS.data_dir = "xyz"
        expected_flags += " --data_dir=xyz"
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        flags.FLAGS.hooks = ["aaa", "bbb", "ccc"]
        expected_flags += " --hooks=aaa,bbb,ccc"
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        flags.FLAGS.stop_threshold = 3.
        expected_flags += " --stop_threshold=3.0"
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        flags.FLAGS.use_synthetic_data = True
        expected_flags += " --use_synthetic_data"
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)

        # Assert that explicit setting a flag to its default value does not cause it
        # to appear in the string
        flags.FLAGS.use_synthetic_data = False
        expected_flags = expected_flags[:-len(" --use_synthetic_data")]
        self.assertEqual(flags_core.get_nondefault_flags_as_str(),
                         expected_flags)