Ejemplo n.º 1
0
        def create_model(self):
            input_ids = AlbertModelTest.ids_tensor(
                [self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = AlbertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = AlbertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], self.type_vocab_size)

            config = modeling.AlbertConfig(
                vocab_size=self.vocab_size,
                embedding_size=self.embedding_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
            )

            model = modeling.AlbertModel(
                config=config,
                is_training=self.is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                scope=self.scope,
            )

            outputs = {
                "embedding_output": model.get_embedding_output(),
                "sequence_output": model.get_sequence_output(),
                "pooled_output": model.get_pooled_output(),
                "all_encoder_layers": model.get_all_encoder_layers(),
            }
            return outputs
Ejemplo n.º 2
0
def _create_config_file(filename, max_seq_length, vocab_size):
    """Creates an AlbertConfig and saves it to file."""
    albert_config = modeling.AlbertConfig(
        vocab_size,
        embedding_size=5,
        hidden_size=14,
        num_hidden_layers=3,
        num_hidden_groups=1,
        num_attention_heads=2,
        intermediate_size=19,
        inner_group_num=1,
        down_scale_factor=1,
        hidden_act="gelu",
        hidden_dropout_prob=0,
        attention_probs_dropout_prob=0,
        max_position_embeddings=max_seq_length,
        type_vocab_size=2,
        initializer_range=0.02)
    with tf.gfile.Open(filename, "w") as outfile:
        outfile.write(albert_config.to_json_string())
Ejemplo n.º 3
0
 def test_config_to_json_string(self):
     config = modeling.AlbertConfig(vocab_size=99, hidden_size=37)
     obj = json.loads(config.to_json_string())
     self.assertEqual(obj["vocab_size"], 99)
     self.assertEqual(obj["hidden_size"], 37)
Ejemplo n.º 4
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.do_train and not FLAGS.do_eval:
    raise ValueError("At least one of `do_train` or `do_eval` must be True.")

  albert_config = modeling.AlbertConfig(
      100,
      embedding_size=7,
      hidden_size=26,
      num_hidden_layers=3,
      num_hidden_groups=1,
      num_attention_heads=13,
      intermediate_size=29,
      inner_group_num=1,
      down_scale_factor=1,
      hidden_act="gelu",
      hidden_dropout_prob=0,
      attention_probs_dropout_prob=0,
      max_position_embeddings=512,
      type_vocab_size=2,
      initializer_range=0.02)

  tf.io.gfile.makedirs(FLAGS.output_dir)

  # Create some dummy input files instead of reading from actual data.
  input_files = list(_make_dummy_input_files(2, 5, 100))

  tf.logging.info("*** Input Files ***")
  for input_file in input_files:
    tf.logging.info("  %s" % input_file)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.estimator.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  model_fn = run_pretraining.model_fn_builder(
      albert_config=albert_config,
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=FLAGS.num_train_steps,
      num_warmup_steps=FLAGS.num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu,
      optimizer=FLAGS.optimizer,
      poly_power=FLAGS.poly_power,
      start_warmup_step=FLAGS.start_warmup_step)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.estimator.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size)

  if FLAGS.do_train:
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    train_input_fn = run_pretraining.input_fn_builder(
        input_files=input_files,
        max_seq_length=FLAGS.max_seq_length,
        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
        is_training=True)
    estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps)

  if FLAGS.do_eval:
    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
    global_step = -1
    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
    writer = tf.io.gfile.GFile(output_eval_file, "w")
    tf.io.gfile.makedirs(FLAGS.export_dir)
    eval_input_fn = run_pretraining.input_fn_builder(
        input_files=input_files,
        max_seq_length=FLAGS.max_seq_length,
        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
        is_training=False)
    while global_step < FLAGS.num_train_steps:
      if estimator.latest_checkpoint() is None:
        tf.logging.info("No checkpoint found yet. Sleeping.")
        time.sleep(1)
      else:
        result = estimator.evaluate(
            input_fn=eval_input_fn, steps=FLAGS.max_eval_steps)
        global_step = result["global_step"]
        tf.logging.info("***** Eval results *****")
        for key in sorted(result.keys()):
          tf.logging.info("  %s = %s", key, str(result[key]))
          writer.write("%s = %s\n" % (key, str(result[key])))