Ejemplo n.º 1
0
    def test_network_invocation(self):
        config = bert.BertPretrainerConfig(
            encoder=encoders.TransformerEncoderConfig(vocab_size=10,
                                                      num_layers=1))
        _ = bert.instantiate_bertpretrainer_from_cfg(config)

        # Invokes with classification heads.
        config = bert.BertPretrainerConfig(
            encoder=encoders.TransformerEncoderConfig(vocab_size=10,
                                                      num_layers=1),
            cls_heads=[
                bert.ClsHeadConfig(inner_dim=10,
                                   num_classes=2,
                                   name="next_sentence")
            ])
        _ = bert.instantiate_bertpretrainer_from_cfg(config)

        with self.assertRaises(ValueError):
            config = bert.BertPretrainerConfig(
                encoder=encoders.TransformerEncoderConfig(vocab_size=10,
                                                          num_layers=1),
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence"),
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence")
                ])
            _ = bert.instantiate_bertpretrainer_from_cfg(config)
Ejemplo n.º 2
0
  def test_task(self, init_cls_pooler):
    # Saves a checkpoint.
    pretrain_cfg = bert.PretrainerConfig(
        encoder=encoders.EncoderConfig(
            bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=768, num_classes=2, name="next_sentence")
        ])
    pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
    # The model variables will be created after the forward call.
    _ = pretrain_model(pretrain_model.inputs)
    ckpt = tf.train.Checkpoint(
        model=pretrain_model, **pretrain_model.checkpoint_items)
    init_path = ckpt.save(self.get_temp_dir())

    # Creates the task.
    config = sentence_prediction.SentencePredictionConfig(
        init_checkpoint=init_path,
        model=self.get_model_config(num_classes=2),
        train_data=self._train_data_config,
        init_cls_pooler=init_cls_pooler)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.initialize(model)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)
Ejemplo n.º 3
0
    def test_task(self):
        config = electra_task.ElectraPretrainConfig(
            model=electra.ElectraPretrainerConfig(
                generator_encoder=encoders.EncoderConfig(
                    bert=encoders.BertEncoderConfig(vocab_size=30522,
                                                    num_layers=1)),
                discriminator_encoder=encoders.EncoderConfig(
                    bert=encoders.BertEncoderConfig(vocab_size=30522,
                                                    num_layers=1)),
                num_masked_tokens=20,
                sequence_length=128,
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence")
                ]),
            train_data=pretrain_dataloader.BertPretrainDataConfig(
                input_path="dummy",
                max_predictions_per_seq=20,
                seq_length=128,
                global_batch_size=1))
        task = electra_task.ElectraPretrainTask(config)
        model = task.build_model()
        metrics = task.build_metrics()
        dataset = task.build_inputs(config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
Ejemplo n.º 4
0
  def test_task(self):
    config = sentence_prediction.SentencePredictionConfig(
        init_checkpoint=self.get_temp_dir(),
        model=self.get_model_config(2),
        train_data=self._train_data_config)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)

    # Saves a checkpoint.
    pretrain_cfg = bert.BertPretrainerConfig(
        encoder=encoders.TransformerEncoderConfig(
            vocab_size=30522, num_layers=1),
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=10, num_classes=3, name="next_sentence")
        ])
    pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
    ckpt = tf.train.Checkpoint(
        model=pretrain_model, **pretrain_model.checkpoint_items)
    ckpt.save(config.init_checkpoint)
    task.initialize(model)
    def test_task_determinism(self):
        config = masked_lm.MaskedLMConfig(
            init_checkpoint=self.get_temp_dir(),
            scale_loss=True,
            model=bert.PretrainerConfig(
                encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                    vocab_size=30522, num_layers=1)),
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence")
                ]),
            train_data=pretrain_dataloader.BertPretrainDataConfig(
                max_predictions_per_seq=20,
                seq_length=128,
                global_batch_size=1))

        tf.keras.utils.set_random_seed(1)
        logs1, validation_logs1, weights1 = self._build_and_run_model(config)
        tf.keras.utils.set_random_seed(1)
        logs2, validation_logs2, weights2 = self._build_and_run_model(config)

        self.assertEqual(logs1["loss"], logs2["loss"])
        self.assertEqual(validation_logs1["loss"], validation_logs2["loss"])
        for weight1, weight2 in zip(weights1, weights2):
            self.assertAllEqual(weight1, weight2)
  def test_task(self):
    config = masked_lm.MaskedLMConfig(
        init_checkpoint=self.get_temp_dir(),
        scale_loss=True,
        model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(
                bert=encoders.BertEncoderConfig(vocab_size=30522,
                                                num_layers=1)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=10, num_classes=2, name="next_sentence")
            ]),
        train_data=pretrain_dataloader.BertPretrainDataConfig(
            input_path="dummy",
            max_predictions_per_seq=20,
            seq_length=128,
            global_batch_size=1))
    task = masked_lm.MaskedLMTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)

    # Saves a checkpoint.
    ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items)
    ckpt.save(config.init_checkpoint)
    task.initialize(model)
Ejemplo n.º 7
0
    def test_task(self):
        # Saves a checkpoint.
        pretrain_cfg = bert.BertPretrainerConfig(encoder=self._encoder_config,
                                                 num_masked_tokens=20,
                                                 cls_heads=[
                                                     bert.ClsHeadConfig(
                                                         inner_dim=10,
                                                         num_classes=3,
                                                         name="next_sentence")
                                                 ])
        pretrain_model = bert.instantiate_bertpretrainer_from_cfg(pretrain_cfg)
        ckpt = tf.train.Checkpoint(model=pretrain_model,
                                   **pretrain_model.checkpoint_items)
        saved_path = ckpt.save(self.get_temp_dir())

        config = question_answering.QuestionAnsweringConfig(
            init_checkpoint=saved_path,
            network=self._encoder_config,
            train_data=self._train_data_config)
        task = question_answering.QuestionAnsweringTask(config)
        model = task.build_model()
        metrics = task.build_metrics()
        dataset = task.build_inputs(config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
        task.initialize(model)
Ejemplo n.º 8
0
 def setUp(self):
   super(ProgressiveMaskedLMTest, self).setUp()
   self.task_config = progressive_masked_lm.ProgMaskedLMConfig(
       model=bert.PretrainerConfig(
           encoder=encoders.EncoderConfig(
               bert=encoders.BertEncoderConfig(vocab_size=30522,
                                               num_layers=2)),
           cls_heads=[
               bert.ClsHeadConfig(
                   inner_dim=10, num_classes=2, name="next_sentence")
           ]),
       train_data=pretrain_dataloader.BertPretrainDataConfig(
           input_path="dummy",
           max_predictions_per_seq=20,
           seq_length=128,
           global_batch_size=1),
       validation_data=pretrain_dataloader.BertPretrainDataConfig(
           input_path="dummy",
           max_predictions_per_seq=20,
           seq_length=128,
           global_batch_size=1),
       stage_list=[
           progressive_masked_lm.StackingStageConfig(
               num_layers=1, num_steps=4),
           progressive_masked_lm.StackingStageConfig(
               num_layers=2, num_steps=8),
           ],
       )
   self.exp_config = cfg.ExperimentConfig(
       task=self.task_config,
       trainer=prog_trainer_lib.ProgressiveTrainerConfig())
Ejemplo n.º 9
0
def roformer_pretraining() -> cfg.ExperimentConfig:
  """BERT pretraining experiment."""
  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(enable_xla=True),
      task=masked_lm.MaskedLMConfig(
          model=bert.PretrainerConfig(
              encoder=encoders.EncoderConfig(
                  type='any', any=roformer.RoformerEncoderConfig()),
              cls_heads=[
                  bert.ClsHeadConfig(
                      inner_dim=768,
                      num_classes=2,
                      dropout_rate=0.1,
                      name='next_sentence')
              ]),
          train_data=pretrain_dataloader.BertPretrainDataConfig(
              use_v2_feature_names=True),
          validation_data=pretrain_dataloader.BertPretrainDataConfig(
              use_v2_feature_names=True, is_training=False)),
      trainer=cfg.TrainerConfig(
          optimizer_config=RoformerOptimizationConfig(), train_steps=1000000),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config
Ejemplo n.º 10
0
class MaskedLMConfig(cfg.TaskConfig):
  """The model config."""
  network: bert.BertPretrainerConfig = bert.BertPretrainerConfig(cls_heads=[
      bert.ClsHeadConfig(
          inner_dim=768, num_classes=2, dropout_rate=0.1, name='next_sentence')
  ])
  train_data: cfg.DataConfig = cfg.DataConfig()
  validation_data: cfg.DataConfig = cfg.DataConfig()
Ejemplo n.º 11
0
 def get_model_config(self, num_classes):
     return bert.BertPretrainerConfig(
         encoder=encoders.TransformerEncoderConfig(vocab_size=30522,
                                                   num_layers=1),
         num_masked_tokens=0,
         cls_heads=[
             bert.ClsHeadConfig(inner_dim=10,
                                num_classes=num_classes,
                                name="sentence_prediction")
         ])
Ejemplo n.º 12
0
class ElectraPretrainConfig(cfg.TaskConfig):
    """The model config."""
    model: electra.ElectraPretrainerConfig = electra.ElectraPretrainerConfig(
        cls_heads=[
            bert.ClsHeadConfig(inner_dim=768,
                               num_classes=2,
                               dropout_rate=0.1,
                               name='next_sentence')
        ])
    train_data: cfg.DataConfig = cfg.DataConfig()
    validation_data: cfg.DataConfig = cfg.DataConfig()
class MaskedLMConfig(cfg.TaskConfig):
  """The model config."""
  model: bert.PretrainerConfig = bert.PretrainerConfig(cls_heads=[
      bert.ClsHeadConfig(
          inner_dim=768, num_classes=2, dropout_rate=0.1, name='next_sentence')
  ])
  # TODO(b/154564893): Mathematically, scale_loss should be True.
  # However, it works better with scale_loss being False.
  scale_loss: bool = False
  train_data: cfg.DataConfig = cfg.DataConfig()
  validation_data: cfg.DataConfig = cfg.DataConfig()
Ejemplo n.º 14
0
 def test_checkpoint_items(self):
     config = bert.BertPretrainerConfig(
         encoder=encoders.TransformerEncoderConfig(vocab_size=10,
                                                   num_layers=1),
         cls_heads=[
             bert.ClsHeadConfig(inner_dim=10,
                                num_classes=2,
                                name="next_sentence")
         ])
     encoder = bert.instantiate_bertpretrainer_from_cfg(config)
     self.assertSameElements(encoder.checkpoint_items.keys(),
                             ["encoder", "next_sentence.pooler_dense"])
 def setUp(self):
     super(SentencePredictionTaskTest, self).setUp()
     self._network_config = bert.BertPretrainerConfig(
         encoder=encoders.TransformerEncoderConfig(vocab_size=30522,
                                                   num_layers=1),
         num_masked_tokens=0,
         cls_heads=[
             bert.ClsHeadConfig(inner_dim=10,
                                num_classes=3,
                                name="sentence_prediction")
         ])
     self._train_data_config = bert.SentencePredictionDataConfig(
         input_path="dummy", seq_length=128, global_batch_size=1)
    def test_masked_lm(self, use_v2_feature_names):
        if use_v2_feature_names:
            input_word_ids_field = "input_word_ids"
            input_type_ids_field = "input_type_ids"
        else:
            input_word_ids_field = "input_ids"
            input_type_ids_field = "segment_ids"
        config = masked_lm.MaskedLMConfig(model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                vocab_size=30522, num_layers=1)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=10, num_classes=2, name="next_sentence")
            ]))
        task = masked_lm.MaskedLMTask(config)
        model = task.build_model()
        params = serving_modules.MaskedLM.Params(
            parse_sequence_length=10,
            max_predictions_per_seq=5,
            use_v2_feature_names=use_v2_feature_names)
        export_module = serving_modules.MaskedLM(params=params, model=model)
        functions = export_module.get_inference_signatures({
            "serve":
            "serving_default",
            "serve_examples":
            "serving_examples"
        })
        self.assertSameElements(functions.keys(),
                                ["serving_default", "serving_examples"])
        dummy_ids = tf.ones((10, 10), dtype=tf.int32)
        dummy_pos = tf.ones((10, 5), dtype=tf.int32)
        outputs = functions["serving_default"](input_word_ids=dummy_ids,
                                               input_mask=dummy_ids,
                                               input_type_ids=dummy_ids,
                                               masked_lm_positions=dummy_pos)
        self.assertEqual(outputs["classification"].shape, (10, 2))

        dummy_ids = tf.ones((10, ), dtype=tf.int32)
        dummy_pos = tf.ones((5, ), dtype=tf.int32)
        examples = _create_fake_serialized_examples({
            input_word_ids_field:
            dummy_ids,
            "input_mask":
            dummy_ids,
            input_type_ids_field:
            dummy_ids,
            "masked_lm_positions":
            dummy_pos
        })
        outputs = functions["serving_examples"](examples)
        self.assertEqual(outputs["classification"].shape, (10, 2))
Ejemplo n.º 17
0
 def test_task_with_hub(self):
     hub_module_url = self._export_bert_tfhub()
     config = sentence_prediction.SentencePredictionConfig(
         hub_module_url=hub_module_url,
         network=bert.BertPretrainerConfig(
             encoders.TransformerEncoderConfig(vocab_size=30522,
                                               num_layers=1),
             num_masked_tokens=0,
             cls_heads=[
                 bert.ClsHeadConfig(inner_dim=10,
                                    num_classes=3,
                                    name="sentence_prediction")
             ]),
         train_data=bert.BertSentencePredictionDataConfig(
             input_path="dummy", seq_length=128, global_batch_size=10))
     self._run_task(config)
Ejemplo n.º 18
0
class SentencePredictionConfig(cfg.TaskConfig):
    """The model config."""
    # At most one of `pretrain_checkpoint_dir` and `hub_module_url` can
    # be specified.
    pretrain_checkpoint_dir: str = ''
    hub_module_url: str = ''
    network: bert.BertPretrainerConfig = bert.BertPretrainerConfig(
        num_masked_tokens=0,
        cls_heads=[
            bert.ClsHeadConfig(inner_dim=768,
                               num_classes=3,
                               dropout_rate=0.1,
                               name='sentence_prediction')
        ])
    train_data: cfg.DataConfig = cfg.DataConfig()
    validation_data: cfg.DataConfig = cfg.DataConfig()
Ejemplo n.º 19
0
class SentencePredictionConfig(cfg.TaskConfig):
    """The model config."""
    # At most one of `init_checkpoint` and `hub_module_url` can
    # be specified.
    init_checkpoint: str = ''
    hub_module_url: str = ''
    metric_type: str = 'accuracy'
    network: bert.BertPretrainerConfig = bert.BertPretrainerConfig(
        num_masked_tokens=0,  # No masked language modeling head.
        cls_heads=[
            bert.ClsHeadConfig(inner_dim=768,
                               num_classes=3,
                               dropout_rate=0.1,
                               name='sentence_prediction')
        ])
    train_data: cfg.DataConfig = cfg.DataConfig()
    validation_data: cfg.DataConfig = cfg.DataConfig()
Ejemplo n.º 20
0
class MaskedLMConfig(cfg.TaskConfig):
    """The model config."""
    init_checkpoint: str = ''
    model: bert.PretrainerConfig = bert.PretrainerConfig(
        cls_heads=[
            bert.ClsHeadConfig(inner_dim=768,
                               num_classes=2,
                               dropout_rate=0.1,
                               name='next_sentence')
        ],
        encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig()))
    scale_loss: bool = False
    train_data: pretrain_dataloader.BertPretrainDataConfig = pretrain_dataloader.BertPretrainDataConfig(
    )
    small_train_data: pretrain_dataloader.BertPretrainDataConfig = pretrain_dataloader.BertPretrainDataConfig(
    )
    validation_data: pretrain_dataloader.BertPretrainDataConfig = pretrain_dataloader.BertPretrainDataConfig(
    )
  def test_task(self, version_2_with_negative, tokenization):
    # Saves a checkpoint.
    pretrain_cfg = bert.PretrainerConfig(
        encoder=self._encoder_config,
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=10, num_classes=3, name="next_sentence")
        ])
    pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
    ckpt = tf.train.Checkpoint(
        model=pretrain_model, **pretrain_model.checkpoint_items)
    saved_path = ckpt.save(self.get_temp_dir())

    config = question_answering.QuestionAnsweringConfig(
        init_checkpoint=saved_path,
        model=question_answering.ModelConfig(encoder=self._encoder_config),
        train_data=self._train_data_config,
        validation_data=self._get_validation_data_config(
            version_2_with_negative))
    self._run_task(config)
Ejemplo n.º 22
0
  def test_network_invocation(self):
    config = electra.ELECTRAPretrainerConfig(
        generator_encoder=encoders.TransformerEncoderConfig(
            vocab_size=10, num_layers=1),
        discriminator_encoder=encoders.TransformerEncoderConfig(
            vocab_size=10, num_layers=2),
    )
    _ = electra.instantiate_pretrainer_from_cfg(config)

    # Invokes with classification heads.
    config = electra.ELECTRAPretrainerConfig(
        generator_encoder=encoders.TransformerEncoderConfig(
            vocab_size=10, num_layers=1),
        discriminator_encoder=encoders.TransformerEncoderConfig(
            vocab_size=10, num_layers=2),
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=10, num_classes=2, name="next_sentence")
        ])
    _ = electra.instantiate_pretrainer_from_cfg(config)
Ejemplo n.º 23
0
 def test_masked_lm(self):
   config = masked_lm.MaskedLMConfig(
       model=bert.PretrainerConfig(
           encoder=encoders.EncoderConfig(
               bert=encoders.BertEncoderConfig(vocab_size=30522,
                                               num_layers=1)),
           cls_heads=[
               bert.ClsHeadConfig(inner_dim=10, num_classes=2, name="foo")
           ]))
   task = masked_lm.MaskedLMTask(config)
   model = task.build_model()
   ckpt = tf.train.Checkpoint(model=model)
   ckpt_path = ckpt.save(self.get_temp_dir())
   export_module_cls = export_savedmodel.lookup_export_module(task)
   serving_params = {
       "cls_head_name": "foo",
       "parse_sequence_length": 10,
       "max_predictions_per_seq": 5
   }
   params = export_module_cls.Params(**serving_params)
   export_module = export_module_cls(params=params, model=model)
   export_dir = export_savedmodel_util.export(
       export_module,
       function_keys={
           "serve": "serving_default",
           "serve_examples": "serving_examples"
       },
       checkpoint_path=ckpt_path,
       export_savedmodel_dir=self.get_temp_dir())
   imported = tf.saved_model.load(export_dir)
   self.assertSameElements(imported.signatures.keys(),
                           ["serving_default", "serving_examples"])
   serving_fn = imported.signatures["serving_default"]
   dummy_ids = tf.ones((1, 10), dtype=tf.int32)
   dummy_pos = tf.ones((1, 5), dtype=tf.int32)
   outputs = serving_fn(
       input_word_ids=dummy_ids,
       input_mask=dummy_ids,
       input_type_ids=dummy_ids,
       masked_lm_positions=dummy_pos)
   self.assertEqual(outputs["classification"].shape, (1, 2))
Ejemplo n.º 24
0
    def test_task(self):
        config = sentence_prediction.SentencePredictionConfig(
            network=bert.BertPretrainerConfig(
                encoders.TransformerEncoderConfig(vocab_size=30522,
                                                  num_layers=1),
                num_masked_tokens=0,
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=3,
                                       name="sentence_prediction")
                ]),
            train_data=bert.BertSentencePredictionDataConfig(
                input_path="dummy", seq_length=128, global_batch_size=1))
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        metrics = task.build_metrics()
        dataset = task.build_inputs(config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
    def test_distribution_strategy(self, distribution_strategy):
        max_seq_length = 128
        batch_size = 8
        input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
        _create_fake_dataset(input_path,
                             seq_length=60,
                             num_masked_tokens=20,
                             max_seq_length=max_seq_length,
                             num_examples=batch_size)
        data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig(
            is_training=False,
            input_path=input_path,
            seq_bucket_lengths=[64, 128],
            global_batch_size=batch_size)
        dataloader = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader(
            data_config)
        distributed_ds = orbit.utils.make_distributed_dataset(
            distribution_strategy, dataloader.load)
        train_iter = iter(distributed_ds)
        with distribution_strategy.scope():
            config = masked_lm.MaskedLMConfig(
                init_checkpoint=self.get_temp_dir(),
                model=bert.PretrainerConfig(
                    encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                        vocab_size=30522, num_layers=1)),
                    cls_heads=[
                        bert.ClsHeadConfig(inner_dim=10,
                                           num_classes=2,
                                           name='next_sentence')
                    ]),
                train_data=data_config)
            task = masked_lm.MaskedLMTask(config)
            model = task.build_model()
            metrics = task.build_metrics()

        @tf.function
        def step_fn(features):
            return task.validation_step(features, model, metrics=metrics)

        distributed_outputs = distribution_strategy.run(
            step_fn, args=(next(train_iter), ))
        local_results = tf.nest.map_structure(
            distribution_strategy.experimental_local_results,
            distributed_outputs)
        logging.info('Dynamic padding:  local_results= %s', str(local_results))
        dynamic_metrics = {}
        for metric in metrics:
            dynamic_metrics[metric.name] = metric.result()

        data_config = pretrain_dataloader.BertPretrainDataConfig(
            is_training=False,
            input_path=input_path,
            seq_length=max_seq_length,
            max_predictions_per_seq=20,
            global_batch_size=batch_size)
        dataloader = pretrain_dataloader.BertPretrainDataLoader(data_config)
        distributed_ds = orbit.utils.make_distributed_dataset(
            distribution_strategy, dataloader.load)
        train_iter = iter(distributed_ds)
        with distribution_strategy.scope():
            metrics = task.build_metrics()

        @tf.function
        def step_fn_b(features):
            return task.validation_step(features, model, metrics=metrics)

        distributed_outputs = distribution_strategy.run(
            step_fn_b, args=(next(train_iter), ))
        local_results = tf.nest.map_structure(
            distribution_strategy.experimental_local_results,
            distributed_outputs)
        logging.info('Static padding:  local_results= %s', str(local_results))
        static_metrics = {}
        for metric in metrics:
            static_metrics[metric.name] = metric.result()
        for key in static_metrics:
            # We need to investigate the differences on losses.
            if key != 'next_sentence_loss':
                self.assertEqual(dynamic_metrics[key], static_metrics[key])
Ejemplo n.º 26
0
from official.core import task_factory
>>>>>>> a811a3b7e640722318ad868c99feddf3f3063e36
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import bert
from official.nlp.data import data_loader_factory


@dataclasses.dataclass
class MaskedLMConfig(cfg.TaskConfig):
  """The model config."""
<<<<<<< HEAD
  init_checkpoint: str = ''
=======
>>>>>>> a811a3b7e640722318ad868c99feddf3f3063e36
  model: bert.BertPretrainerConfig = bert.BertPretrainerConfig(cls_heads=[
      bert.ClsHeadConfig(
          inner_dim=768, num_classes=2, dropout_rate=0.1, name='next_sentence')
  ])
  train_data: cfg.DataConfig = cfg.DataConfig()
  validation_data: cfg.DataConfig = cfg.DataConfig()


<<<<<<< HEAD
@base_task.register_task_cls(MaskedLMConfig)
=======
@task_factory.register_task_cls(MaskedLMConfig)
>>>>>>> a811a3b7e640722318ad868c99feddf3f3063e36
class MaskedLMTask(base_task.Task):
  """Mock task object for testing."""

  def build_model(self, params=None):
    params = params or self.task_config.model
Ejemplo n.º 27
0
    def prepare_config(self, teacher_block_num, student_block_num,
                       transfer_teacher_layers):
        # using small model for testing
        task_config = distillation.BertDistillationTaskConfig(
            teacher_model=bert.PretrainerConfig(encoder=encoders.EncoderConfig(
                type='mobilebert',
                mobilebert=encoders.MobileBertEncoderConfig(
                    num_blocks=teacher_block_num)),
                                                cls_heads=[
                                                    bert.ClsHeadConfig(
                                                        inner_dim=256,
                                                        num_classes=2,
                                                        dropout_rate=0.1,
                                                        name='next_sentence')
                                                ],
                                                mlm_activation='gelu'),
            student_model=bert.PretrainerConfig(encoder=encoders.EncoderConfig(
                type='mobilebert',
                mobilebert=encoders.MobileBertEncoderConfig(
                    num_blocks=student_block_num)),
                                                cls_heads=[
                                                    bert.ClsHeadConfig(
                                                        inner_dim=256,
                                                        num_classes=2,
                                                        dropout_rate=0.1,
                                                        name='next_sentence')
                                                ],
                                                mlm_activation='relu'),
            train_data=pretrain_dataloader.BertPretrainDataConfig(
                input_path='dummy',
                max_predictions_per_seq=76,
                seq_length=512,
                global_batch_size=10),
            validation_data=pretrain_dataloader.BertPretrainDataConfig(
                input_path='dummy',
                max_predictions_per_seq=76,
                seq_length=512,
                global_batch_size=10))

        # set only 1 step for each stage
        progressive_config = distillation.BertDistillationProgressiveConfig()
        progressive_config.layer_wise_distill_config.transfer_teacher_layers = (
            transfer_teacher_layers)
        progressive_config.layer_wise_distill_config.num_steps = 1
        progressive_config.pretrain_distill_config.num_steps = 1

        optimization_config = optimization.OptimizationConfig(
            optimizer=optimization.OptimizerConfig(
                type='lamb',
                lamb=optimization.LAMBConfig(weight_decay_rate=0.0001,
                                             exclude_from_weight_decay=[
                                                 'LayerNorm', 'layer_norm',
                                                 'bias', 'no_norm'
                                             ])),
            learning_rate=optimization.LrConfig(
                type='polynomial',
                polynomial=optimization.PolynomialLrConfig(
                    initial_learning_rate=1.5e-3,
                    decay_steps=10000,
                    end_learning_rate=1.5e-3)),
            warmup=optimization.WarmupConfig(
                type='linear',
                linear=optimization.LinearWarmupConfig(
                    warmup_learning_rate=0)))

        exp_config = cfg.ExperimentConfig(
            task=task_config,
            trainer=prog_trainer_lib.ProgressiveTrainerConfig(
                progressive=progressive_config,
                optimizer_config=optimization_config))

        # Create a teacher model checkpoint.
        teacher_encoder = encoders.build_encoder(
            task_config.teacher_model.encoder)
        pretrainer_config = task_config.teacher_model
        if pretrainer_config.cls_heads:
            teacher_cls_heads = [
                layers.ClassificationHead(**cfg.as_dict())
                for cfg in pretrainer_config.cls_heads
            ]
        else:
            teacher_cls_heads = []

        masked_lm = layers.MobileBertMaskedLM(
            embedding_table=teacher_encoder.get_embedding_table(),
            activation=tf_utils.get_activation(
                pretrainer_config.mlm_activation),
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=pretrainer_config.mlm_initializer_range),
            name='cls/predictions')
        teacher_pretrainer = models.BertPretrainerV2(
            encoder_network=teacher_encoder,
            classification_heads=teacher_cls_heads,
            customized_masked_lm=masked_lm)

        # The model variables will be created after the forward call.
        _ = teacher_pretrainer(teacher_pretrainer.inputs)
        teacher_pretrainer_ckpt = tf.train.Checkpoint(
            **teacher_pretrainer.checkpoint_items)
        teacher_ckpt_path = os.path.join(self.get_temp_dir(),
                                         'teacher_model.ckpt')
        teacher_pretrainer_ckpt.save(teacher_ckpt_path)
        exp_config.task.teacher_model_init_checkpoint = self.get_temp_dir()

        return exp_config