def run_epoch(supervisor, sess, m, dataset, hparams, eval_op, experiment_type,
              epoch_count):
  """Runs an epoch of training or evaluate the model on given data."""
  # reduce variance in validation loss by fixing the seed
  data_seed = 123 if experiment_type == 'valid' else None
  with lib_util.numpy_seed(data_seed):
    batches = (
        dataset.get_featuremaps().batches(
            size=m.batch_size, shuffle=True, shuffle_rng=data_seed))

  losses = lib_util.AggregateMean('losses')
  losses_total = lib_util.AggregateMean('losses_total')
  losses_mask = lib_util.AggregateMean('losses_mask')
  losses_unmask = lib_util.AggregateMean('losses_unmask')

  start_time = time.time()
  for unused_step, batch in enumerate(batches):
    # Evaluate the graph and run back propagation.
    fetches = [
        m.loss, m.loss_total, m.loss_mask, m.loss_unmask, m.reduced_mask_size,
        m.reduced_unmask_size, m.learning_rate, eval_op
    ]
    feed_dict = batch.get_feed_dict(m.placeholders)
    (loss, loss_total, loss_mask, loss_unmask, reduced_mask_size,
     reduced_unmask_size, learning_rate, _) = sess.run(
         fetches, feed_dict=feed_dict)

    # Aggregate performances.
    losses_total.add(loss_total, 1)
    # Multiply the mean loss_mask by reduced_mask_size for aggregation as the
    # mask size could be different for every batch.
    losses_mask.add(loss_mask * reduced_mask_size, reduced_mask_size)
    losses_unmask.add(loss_unmask * reduced_unmask_size, reduced_unmask_size)

    if hparams.optimize_mask_only:
      losses.add(loss * reduced_mask_size, reduced_mask_size)
    else:
      losses.add(loss, 1)

  # Collect run statistics.
  run_stats = dict()
  run_stats['loss_mask'] = losses_mask.mean
  run_stats['loss_unmask'] = losses_unmask.mean
  run_stats['loss_total'] = losses_total.mean
  run_stats['loss'] = losses.mean
  if experiment_type == 'train':
    run_stats['learning_rate'] = float(learning_rate)

  # Make summaries.
  if FLAGS.log_progress:
    summaries = tf.Summary()
    for stat_name, stat in run_stats.iteritems():
      value = summaries.value.add()
      value.tag = '%s_%s' % (stat_name, experiment_type)
      value.simple_value = stat
    supervisor.summary_computed(sess, summaries, epoch_count)

  tf.logging.info('%s, epoch %d: loss (mask): %.4f, loss (unmask): %.4f, '
                  'loss (total): %.4f, log lr: %.4f, time taken: %.4f',
                  experiment_type, epoch_count, run_stats['loss_mask'],
                  run_stats['loss_unmask'], run_stats['loss_total'],
                  np.log2(run_stats['learning_rate'])
                  if 'learning_rate' in run_stats else 0,
                  time.time() - start_time)

  return run_stats['loss']
Example #2
0
def run_epoch(supervisor, sess, m, dataset, hparams, eval_op, experiment_type,
              epoch_count):
  """Runs an epoch of training or evaluate the model on given data."""
  # reduce variance in validation loss by fixing the seed
  data_seed = 123 if experiment_type == 'valid' else None
  with lib_util.numpy_seed(data_seed):
    batches = (
        dataset.get_featuremaps().batches(
            size=m.batch_size, shuffle=True, shuffle_rng=data_seed))

  losses = lib_util.AggregateMean('losses')
  losses_total = lib_util.AggregateMean('losses_total')
  losses_mask = lib_util.AggregateMean('losses_mask')
  losses_unmask = lib_util.AggregateMean('losses_unmask')

  start_time = time.time()
  for unused_step, batch in enumerate(batches):
    # Evaluate the graph and run back propagation.
    fetches = [
        m.loss, m.loss_total, m.loss_mask, m.loss_unmask, m.reduced_mask_size,
        m.reduced_unmask_size, m.learning_rate, eval_op
    ]
    feed_dict = batch.get_feed_dict(m.placeholders)
    (loss, loss_total, loss_mask, loss_unmask, reduced_mask_size,
     reduced_unmask_size, learning_rate, _) = sess.run(
         fetches, feed_dict=feed_dict)

    # Aggregate performances.
    losses_total.add(loss_total, 1)
    # Multiply the mean loss_mask by reduced_mask_size for aggregation as the
    # mask size could be different for every batch.
    losses_mask.add(loss_mask * reduced_mask_size, reduced_mask_size)
    losses_unmask.add(loss_unmask * reduced_unmask_size, reduced_unmask_size)

    if hparams.optimize_mask_only:
      losses.add(loss * reduced_mask_size, reduced_mask_size)
    else:
      losses.add(loss, 1)

  # Collect run statistics.
  run_stats = dict()
  run_stats['loss_mask'] = losses_mask.mean
  run_stats['loss_unmask'] = losses_unmask.mean
  run_stats['loss_total'] = losses_total.mean
  run_stats['loss'] = losses.mean
  if experiment_type == 'train':
    run_stats['learning_rate'] = float(learning_rate)

  # Make summaries.
  if FLAGS.log_progress:
    summaries = tf.Summary()
    for stat_name, stat in six.iteritems(run_stats):
      value = summaries.value.add()
      value.tag = '%s_%s' % (stat_name, experiment_type)
      value.simple_value = stat
    supervisor.summary_computed(sess, summaries, epoch_count)

  tf.logging.info(
      '%s, epoch %d: loss (mask): %.4f, loss (unmask): %.4f, '
      'loss (total): %.4f, log lr: %.4f, time taken: %.4f',
      experiment_type, epoch_count, run_stats['loss_mask'],
      run_stats['loss_unmask'], run_stats['loss_total'],
      np.log(run_stats['learning_rate']) if 'learning_rate' in run_stats else 0,
      time.time() - start_time)

  return run_stats['loss']