def create_model(self):
      input_ids = AlbertModelTest.ids_tensor([self.batch_size, self.seq_length],
                                             self.vocab_size)

      input_mask = None
      if self.use_input_mask:
        input_mask = AlbertModelTest.ids_tensor(
            [self.batch_size, self.seq_length], vocab_size=2)

      token_type_ids = None
      if self.use_token_type_ids:
        token_type_ids = AlbertModelTest.ids_tensor(
            [self.batch_size, self.seq_length], self.type_vocab_size)

      config = modeling.AlbertConfig(
          vocab_size=self.vocab_size,
          embedding_size=self.embedding_size,
          hidden_size=self.hidden_size,
          num_hidden_layers=self.num_hidden_layers,
          num_attention_heads=self.num_attention_heads,
          intermediate_size=self.intermediate_size,
          hidden_act=self.hidden_act,
          hidden_dropout_prob=self.hidden_dropout_prob,
          attention_probs_dropout_prob=self.attention_probs_dropout_prob,
          max_position_embeddings=self.max_position_embeddings,
          type_vocab_size=self.type_vocab_size,
          initializer_range=self.initializer_range)

      model = modeling.AlbertModel(
          config=config,
          is_training=self.is_training,
          input_ids=input_ids,
          input_mask=input_mask,
          token_type_ids=token_type_ids,
          scope=self.scope)

      outputs = {
          "embedding_output": model.get_embedding_output(),
          "sequence_output": model.get_sequence_output(),
          "pooled_output": model.get_pooled_output(),
          "all_encoder_layers": model.get_all_encoder_layers(),
      }
      return outputs
Example #2
0
def _create_config_file(filename, max_seq_length, vocab_size):
    """Creates an AlbertConfig and saves it to file."""
    albert_config = modeling.AlbertConfig(
        vocab_size,
        embedding_size=5,
        hidden_size=14,
        num_hidden_layers=3,
        num_hidden_groups=1,
        num_attention_heads=2,
        intermediate_size=19,
        inner_group_num=1,
        down_scale_factor=1,
        hidden_act="gelu",
        hidden_dropout_prob=0,
        attention_probs_dropout_prob=0,
        max_position_embeddings=max_seq_length,
        type_vocab_size=2,
        initializer_range=0.02)
    with tf.gfile.Open(filename, "w") as outfile:
        outfile.write(albert_config.to_json_string())
Example #3
0
 def test_config_to_json_string(self):
     config = modeling.AlbertConfig(vocab_size=99, hidden_size=37)
     obj = json.loads(config.to_json_string())
     self.assertEqual(obj["vocab_size"], 99)
     self.assertEqual(obj["hidden_size"], 37)
Example #4
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    albert_config = modeling.AlbertConfig(100,
                                          embedding_size=7,
                                          hidden_size=26,
                                          num_hidden_layers=3,
                                          num_hidden_groups=1,
                                          num_attention_heads=13,
                                          intermediate_size=29,
                                          inner_group_num=1,
                                          down_scale_factor=1,
                                          hidden_act="gelu",
                                          hidden_dropout_prob=0,
                                          attention_probs_dropout_prob=0,
                                          max_position_embeddings=512,
                                          type_vocab_size=2,
                                          initializer_range=0.02)

    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Create some dummy input files instead of reading from actual data.
    input_files = list(_make_dummy_input_files(2, 5, 100))

    tf.logging.info("*** Input Files ***")
    for input_file in input_files:
        tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = run_pretraining.model_fn_builder(
        albert_config=albert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=FLAGS.num_train_steps,
        num_warmup_steps=FLAGS.num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        optimizer=FLAGS.optimizer,
        poly_power=FLAGS.poly_power,
        start_warmup_step=FLAGS.start_warmup_step)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.estimator.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        train_input_fn = run_pretraining.input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=True)
        estimator.train(input_fn=train_input_fn,
                        max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        global_step = -1
        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        writer = tf.io.gfile.GFile(output_eval_file, "w")
        tf.io.gfile.makedirs(FLAGS.export_dir)
        eval_input_fn = run_pretraining.input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False)
        while global_step < FLAGS.num_train_steps:
            if estimator.latest_checkpoint() is None:
                tf.logging.info("No checkpoint found yet. Sleeping.")
                time.sleep(1)
            else:
                result = estimator.evaluate(input_fn=eval_input_fn,
                                            steps=FLAGS.max_eval_steps)
                global_step = result["global_step"]
                tf.logging.info("***** Eval results *****")
                for key in sorted(result.keys()):
                    tf.logging.info("  %s = %s", key, str(result[key]))
                    writer.write("%s = %s\n" % (key, str(result[key])))