コード例 #1
0
def eval_single_checkpoint(ckpt_name, output_path, model, datasets,
                           embedding_matrices):
    """Runs quantitative evaluation on a single checkpoint."""
    if gfile.exists(output_path):
        logging.info('Skipping already exists: "%s"', output_path)
        return

    metrics = model.create_metrics()

    logging.info('Evaluating: "%s"', ckpt_name)
    utils.do_evaluation(model, metrics, datasets, embedding_matrices)

    # This code assumed the checkpoint name contains the epoch and step in the
    # following format.
    path_search = re.search(r'ep(\w+)_step(\w+)', ckpt_name)
    epoch = int(path_search.group(1))
    step = int(path_search.group(2))

    to_write = collections.OrderedDict()
    to_write['checkpoint'] = ckpt_name
    to_write['epoch'] = epoch
    to_write['step'] = step
    for metric in metrics.values():
        if metric.name in METRICS_TO_SAVE:
            tf.summary.scalar(metric.name, metric.result(), step=step)
            to_write[metric.name] = metric.result().numpy()
        metric.reset_states()

    # Save the results to a text file.
    with gfile.GFile(output_path, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=to_write.keys())
        writer.writeheader()
        writer.writerow(to_write)
コード例 #2
0
def train(save_dir, num_epochs=300,
          learning_rate=0.0001, save_every_n_epochs=25):
  """Train pipeline for next sentence embedding prediction on ROCStories."""
  #### LOAD DATA ####
  datasets, embedding_matrices = prepare_datasets()

  #### CREATE MODEL AND OPTIMIZER ####
  num_input_sentences = tf.compat.v1.data.get_output_shapes(
      datasets['train'])[0][1]
  model = models.build_model(
      num_input_sentences=num_input_sentences,
      embedding_matrix=embedding_matrices['train'])
  metrics = model.create_metrics()
  optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)

  num_train_steps = 0

  #### DO TRAINING ####
  summary_writer = tf.summary.create_file_writer(
      os.path.join(save_dir, 'summaries'))
  with summary_writer.as_default():
    logging.info('Starting training.')
    for epoch in range(1, num_epochs+1):
      for x, labels in datasets['train']:
        utils.train_step(model, optimizer, x, labels, metrics)
        num_train_steps += 1

      start_time = time.time()
      utils.do_evaluation(model, metrics, datasets, embedding_matrices)
      logging.info('Eval took %f seconds.', (time.time() - start_time))

      to_log = ['%s=%f, ' % (m.name, m.result()) for m in metrics.values()]
      logging.info('Epoch %d, %s ', epoch, ''.join(to_log))

      # Add each metric to the TensorBoard and then reset it for the next epoch.
      for metric in metrics.values():
        tf.summary.scalar(
            metric.name, metric.result(), step=optimizer.iterations)
        metric.reset_states()

      # lr = cur_learning_rate(optimizer)
      # tf.summary.scalar('learning_rate', lr, step=optimizer.iterations)

      if epoch % save_every_n_epochs == 0:
        prefix = os.path.join(
            save_dir, 'ep%04d_step%05d.ckpt' % (epoch, num_train_steps))
        logging.info('Saving checkpoint: %s', prefix)
        checkpoint.save(file_prefix=prefix)

  #### SAVE HYPERPARAMETERS AND FINAL EVAL RESULTS TO FILE ####
  to_save = {}
  for metric in metrics.values():
    metric.reset_states()
  utils.do_evaluation(model, metrics, datasets, embedding_matrices)
  for metric in metrics.values():
    to_save['metric_' + metric.name] = metric.result().numpy()
  results_file_path = os.path.join(save_dir, 'final_eval.tsv')
  with gfile.GFile(results_file_path, 'w') as f:
    for name, value in to_save.iteritems():
      f.write('%s\t%s\n' % (name, str(value)))