Esempio n. 1
0
def _create_all_examples(
    task,
    vocab_file,
    test_mode,
    output_dir,
    test_batch_size,
):
  """Converts interactions to TF examples."""
  interaction_dir = task_utils.get_interaction_dir(output_dir)
  example_dir = os.path.join(output_dir, 'tf_examples')
  file_utils.make_directories(example_dir)

  _create_examples(
      interaction_dir,
      example_dir,
      vocab_file,
      task_utils.get_train_filename(task),
      batch_size=None,
      test_mode=test_mode)
  _create_examples(interaction_dir, example_dir, vocab_file,
                   task_utils.get_dev_filename(task), test_batch_size,
                   test_mode)
  _create_examples(interaction_dir, example_dir, vocab_file,
                   task_utils.get_test_filename(task), test_batch_size,
                   test_mode)
Esempio n. 2
0
def create_interactions(supervision_modes, input_dir=Text, output_dir=Text):
    """Converts data in SQA format to Interaction protos.

  Args:
    supervision_modes: Import for WikiSQL, decide if supervision is removed.
    input_dir: SQA data.
    output_dir: Where interactions will be written.
  """
    file_utils.make_directories(output_dir)

    interaction_dict = _read_interactions(input_dir)
    _add_tables(input_dir, interaction_dict)
    _parse_questions(interaction_dict, supervision_modes,
                     os.path.join(output_dir, 'report.tsv'))
    for filename, interactions in interaction_dict.items():
        _write_tfrecord(interactions,
                        _get_output_filename(output_dir, filename))
Esempio n. 3
0
def _train_and_predict(
    task,
    tpu_options,
    test_batch_size,
    train_batch_size,
    gradient_accumulation_steps,
    bert_config_file,
    init_checkpoint,
    test_mode,
    mode,
    output_dir,
    model_dir,
):
  """Trains, produces test predictions and eval metric."""
  file_utils.make_directories(model_dir)

  if task == tasks.Task.SQA:
    num_aggregation_labels = 0
    do_model_aggregation = False
    use_answer_as_supervision = None
  elif task in [
      tasks.Task.WTQ, tasks.Task.WIKISQL, tasks.Task.WIKISQL_SUPERVISED
  ]:
    num_aggregation_labels = 4
    do_model_aggregation = True
    use_answer_as_supervision = task != tasks.Task.WIKISQL_SUPERVISED
  else:
    raise ValueError(f'Unknown task: {task.name}')

  hparams = hparam_utils.get_hparams(task)
  if test_mode:
    if train_batch_size is None:
      train_batch_size = 1
    test_batch_size = 1
    num_train_steps = 10
    num_warmup_steps = 1
  else:
    if train_batch_size is None:
      train_batch_size = hparams['train_batch_size']
    num_train_examples = hparams['num_train_examples']
    num_train_steps = int(num_train_examples / train_batch_size)
    num_warmup_steps = int(num_train_steps * hparams['warmup_ratio'])

  bert_config = modeling.BertConfig.from_json_file(bert_config_file)
  tapas_config = tapas_classifier_model.TapasClassifierConfig(
      bert_config=bert_config,
      init_checkpoint=init_checkpoint,
      learning_rate=hparams['learning_rate'],
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=tpu_options.use_tpu,
      positive_weight=10.0,
      num_aggregation_labels=num_aggregation_labels,
      num_classification_labels=0,
      aggregation_loss_importance=1.0,
      use_answer_as_supervision=use_answer_as_supervision,
      answer_loss_importance=1.0,
      use_normalized_answer_loss=False,
      huber_loss_delta=hparams.get('huber_loss_delta'),
      temperature=hparams.get('temperature', 1.0),
      agg_temperature=1.0,
      use_gumbel_for_cells=False,
      use_gumbel_for_agg=False,
      average_approximation_function=tapas_classifier_model.\
        AverageApproximationFunction.RATIO,
      cell_select_pref=hparams.get('cell_select_pref'),
      answer_loss_cutoff=hparams.get('answer_loss_cutoff'),
      grad_clipping=hparams.get('grad_clipping'),
      disabled_features=[],
      max_num_rows=64,
      max_num_columns=32,
      average_logits_per_cell=False,
      init_cell_selection_weights_to_zero=\
        hparams['init_cell_selection_weights_to_zero'],
      select_one_column=hparams['select_one_column'],
      allow_empty_column_selection=hparams['allow_empty_column_selection'],
      disable_position_embeddings=False)

  model_fn = tapas_classifier_model.model_fn_builder(tapas_config)

  is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2

  tpu_cluster_resolver = None
  if tpu_options.use_tpu and tpu_options.tpu_name:
    tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
        tpu=tpu_options.tpu_name,
        zone=tpu_options.tpu_zone,
        project=tpu_options.gcp_project,
    )

  run_config = tf.estimator.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=tpu_options.master,
      model_dir=model_dir,
      tf_random_seed=FLAGS.tf_random_seed,
      save_checkpoints_steps=1000,
      keep_checkpoint_max=5,
      keep_checkpoint_every_n_hours=4.0,
      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=tpu_options.iterations_per_loop,
          num_shards=tpu_options.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  # If TPU is not available, this will fall back to normal Estimator on CPU/GPU.
  estimator = tf.estimator.tpu.TPUEstimator(
      params={'gradient_accumulation_steps': gradient_accumulation_steps},
      use_tpu=tpu_options.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=train_batch_size // gradient_accumulation_steps,
      eval_batch_size=None,
      predict_batch_size=test_batch_size)

  if mode == Mode.TRAIN:
    _print('Training')
    bert_config.to_json_file(os.path.join(model_dir, 'bert_config.json'))
    tapas_config.to_json_file(os.path.join(model_dir, 'tapas_config.json'))
    train_input_fn = functools.partial(
        tapas_classifier_model.input_fn,
        name='train',
        file_patterns=_get_train_examples_file(task, output_dir),
        data_format='tfrecord',
        compression_type=FLAGS.compression_type,
        is_training=True,
        max_seq_length=FLAGS.max_seq_length,
        max_predictions_per_seq=_MAX_PREDICTIONS_PER_SEQ,
        add_aggregation_function_id=do_model_aggregation,
        add_classification_labels=False,
        add_answer=use_answer_as_supervision,
        include_id=False,
    )
    estimator.train(
        input_fn=train_input_fn,
        max_steps=tapas_config.num_train_steps,
    )

  elif mode == Mode.PREDICT_AND_EVALUATE:

    # Starts a continous eval that starts with the latest checkpoint and runs
    # until a checkpoint with 'num_train_steps' is reached.
    prev_checkpoint = None
    while True:
      checkpoint = estimator.latest_checkpoint()

      if checkpoint == prev_checkpoint:
        _print('Sleeping 5 mins before predicting')
        time.sleep(5 * 60)
        continue

      current_step = int(os.path.basename(checkpoint).split('-')[1])
      _predict(
          estimator,
          task,
          output_dir,
          model_dir,
          do_model_aggregation,
          use_answer_as_supervision,
          use_tpu=tapas_config.use_tpu,
          global_step=current_step,
      )
      _eval(
          task=task,
          output_dir=output_dir,
          model_dir=model_dir,
          global_step=current_step)
      if current_step >= tapas_config.num_train_steps:
        _print(f'Evaluation finished after training step {current_step}.')
        break

  else:
    raise ValueError(f'Unexpected mode: {mode}.')
Esempio n. 4
0
def _create_dirs(output_dir):
  file_utils.make_directories(os.path.join(output_dir, _TABLE_DIR_NAME))