def test_task(self):
    config = masked_lm.MaskedLMConfig(
        init_checkpoint=self.get_temp_dir(),
        scale_loss=True,
        model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(
                bert=encoders.BertEncoderConfig(vocab_size=30522,
                                                num_layers=1)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=10, num_classes=2, name="next_sentence")
            ]),
        train_data=pretrain_dataloader.BertPretrainDataConfig(
            input_path="dummy",
            max_predictions_per_seq=20,
            seq_length=128,
            global_batch_size=1))
    task = masked_lm.MaskedLMTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)

    # Saves a checkpoint.
    ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items)
    ckpt.save(config.init_checkpoint)
    task.initialize(model)
Ejemplo n.º 2
0
def bert_pretraining() -> cfg.ExperimentConfig:
    """BERT pretraining experiment."""
    config = cfg.ExperimentConfig(
        task=masked_lm.MaskedLMConfig(
            train_data=pretrain_dataloader.BertPretrainDataConfig(),
            validation_data=pretrain_dataloader.BertPretrainDataConfig(
                is_training=False)),
        trainer=cfg.TrainerConfig(
            train_steps=1000000,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adamw',
                    'adamw': {
                        'weight_decay_rate':
                        0.01,
                        'exclude_from_weight_decay':
                        ['LayerNorm', 'layer_norm', 'bias'],
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 1e-4,
                        'end_learning_rate': 0.0,
                    }
                },
                'warmup': {
                    'type': 'polynomial'
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
    def test_task_determinism(self):
        config = masked_lm.MaskedLMConfig(
            init_checkpoint=self.get_temp_dir(),
            scale_loss=True,
            model=bert.PretrainerConfig(
                encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                    vocab_size=30522, num_layers=1)),
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence")
                ]),
            train_data=pretrain_dataloader.BertPretrainDataConfig(
                max_predictions_per_seq=20,
                seq_length=128,
                global_batch_size=1))

        tf.keras.utils.set_random_seed(1)
        logs1, validation_logs1, weights1 = self._build_and_run_model(config)
        tf.keras.utils.set_random_seed(1)
        logs2, validation_logs2, weights2 = self._build_and_run_model(config)

        self.assertEqual(logs1["loss"], logs2["loss"])
        self.assertEqual(validation_logs1["loss"], validation_logs2["loss"])
        for weight1, weight2 in zip(weights1, weights2):
            self.assertAllEqual(weight1, weight2)
Ejemplo n.º 4
0
def roformer_pretraining() -> cfg.ExperimentConfig:
  """BERT pretraining experiment."""
  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(enable_xla=True),
      task=masked_lm.MaskedLMConfig(
          model=bert.PretrainerConfig(
              encoder=encoders.EncoderConfig(
                  type='any', any=roformer.RoformerEncoderConfig()),
              cls_heads=[
                  bert.ClsHeadConfig(
                      inner_dim=768,
                      num_classes=2,
                      dropout_rate=0.1,
                      name='next_sentence')
              ]),
          train_data=pretrain_dataloader.BertPretrainDataConfig(
              use_v2_feature_names=True),
          validation_data=pretrain_dataloader.BertPretrainDataConfig(
              use_v2_feature_names=True, is_training=False)),
      trainer=cfg.TrainerConfig(
          optimizer_config=RoformerOptimizationConfig(), train_steps=1000000),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config
Ejemplo n.º 5
0
def bert_pretraining() -> cfg.ExperimentConfig:
    """BERT pretraining experiment."""
    config = cfg.ExperimentConfig(
        task=masked_lm.MaskedLMConfig(
            train_data=pretrain_dataloader.BertPretrainDataConfig(),
            validation_data=pretrain_dataloader.BertPretrainDataConfig(
                is_training=False)),
        trainer=_TRAINER,
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
    def test_masked_lm(self, use_v2_feature_names):
        if use_v2_feature_names:
            input_word_ids_field = "input_word_ids"
            input_type_ids_field = "input_type_ids"
        else:
            input_word_ids_field = "input_ids"
            input_type_ids_field = "segment_ids"
        config = masked_lm.MaskedLMConfig(model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                vocab_size=30522, num_layers=1)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=10, num_classes=2, name="next_sentence")
            ]))
        task = masked_lm.MaskedLMTask(config)
        model = task.build_model()
        params = serving_modules.MaskedLM.Params(
            parse_sequence_length=10,
            max_predictions_per_seq=5,
            use_v2_feature_names=use_v2_feature_names)
        export_module = serving_modules.MaskedLM(params=params, model=model)
        functions = export_module.get_inference_signatures({
            "serve":
            "serving_default",
            "serve_examples":
            "serving_examples"
        })
        self.assertSameElements(functions.keys(),
                                ["serving_default", "serving_examples"])
        dummy_ids = tf.ones((10, 10), dtype=tf.int32)
        dummy_pos = tf.ones((10, 5), dtype=tf.int32)
        outputs = functions["serving_default"](input_word_ids=dummy_ids,
                                               input_mask=dummy_ids,
                                               input_type_ids=dummy_ids,
                                               masked_lm_positions=dummy_pos)
        self.assertEqual(outputs["classification"].shape, (10, 2))

        dummy_ids = tf.ones((10, ), dtype=tf.int32)
        dummy_pos = tf.ones((5, ), dtype=tf.int32)
        examples = _create_fake_serialized_examples({
            input_word_ids_field:
            dummy_ids,
            "input_mask":
            dummy_ids,
            input_type_ids_field:
            dummy_ids,
            "masked_lm_positions":
            dummy_pos
        })
        outputs = functions["serving_examples"](examples)
        self.assertEqual(outputs["classification"].shape, (10, 2))
Ejemplo n.º 7
0
def bert_dynamic() -> cfg.ExperimentConfig:
    """BERT base with dynamic input sequences.

  TPU needs to run with tf.data service with round-robin behavior.
  """
    config = cfg.ExperimentConfig(
        task=masked_lm.MaskedLMConfig(
            train_data=pretrain_dynamic_dataloader.BertPretrainDataConfig(),
            validation_data=pretrain_dataloader.BertPretrainDataConfig(
                is_training=False)),
        trainer=_TRAINER,
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Ejemplo n.º 8
0
 def test_masked_lm(self):
   config = masked_lm.MaskedLMConfig(
       model=bert.PretrainerConfig(
           encoder=encoders.EncoderConfig(
               bert=encoders.BertEncoderConfig(vocab_size=30522,
                                               num_layers=1)),
           cls_heads=[
               bert.ClsHeadConfig(inner_dim=10, num_classes=2, name="foo")
           ]))
   task = masked_lm.MaskedLMTask(config)
   model = task.build_model()
   ckpt = tf.train.Checkpoint(model=model)
   ckpt_path = ckpt.save(self.get_temp_dir())
   export_module_cls = export_savedmodel.lookup_export_module(task)
   serving_params = {
       "cls_head_name": "foo",
       "parse_sequence_length": 10,
       "max_predictions_per_seq": 5
   }
   params = export_module_cls.Params(**serving_params)
   export_module = export_module_cls(params=params, model=model)
   export_dir = export_savedmodel_util.export(
       export_module,
       function_keys={
           "serve": "serving_default",
           "serve_examples": "serving_examples"
       },
       checkpoint_path=ckpt_path,
       export_savedmodel_dir=self.get_temp_dir())
   imported = tf.saved_model.load(export_dir)
   self.assertSameElements(imported.signatures.keys(),
                           ["serving_default", "serving_examples"])
   serving_fn = imported.signatures["serving_default"]
   dummy_ids = tf.ones((1, 10), dtype=tf.int32)
   dummy_pos = tf.ones((1, 5), dtype=tf.int32)
   outputs = serving_fn(
       input_word_ids=dummy_ids,
       input_mask=dummy_ids,
       input_type_ids=dummy_ids,
       masked_lm_positions=dummy_pos)
   self.assertEqual(outputs["classification"].shape, (1, 2))
Ejemplo n.º 9
0
    def test_task(self):
        config = masked_lm.MaskedLMConfig(
            model=bert.BertPretrainerConfig(
                encoders.TransformerEncoderConfig(vocab_size=30522,
                                                  num_layers=1),
                num_masked_tokens=20,
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=2,
                                       name="next_sentence")
                ]),
            train_data=bert.BertPretrainDataConfig(input_path="dummy",
                                                   max_predictions_per_seq=20,
                                                   seq_length=128,
                                                   global_batch_size=1))
        task = masked_lm.MaskedLMTask(config)
        model = task.build_model()
        metrics = task.build_metrics()
        dataset = task.build_inputs(config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
    def test_distribution_strategy(self, distribution_strategy):
        max_seq_length = 128
        batch_size = 8
        input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
        _create_fake_dataset(input_path,
                             seq_length=60,
                             num_masked_tokens=20,
                             max_seq_length=max_seq_length,
                             num_examples=batch_size)
        data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig(
            is_training=False,
            input_path=input_path,
            seq_bucket_lengths=[64, 128],
            global_batch_size=batch_size)
        dataloader = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader(
            data_config)
        distributed_ds = orbit.utils.make_distributed_dataset(
            distribution_strategy, dataloader.load)
        train_iter = iter(distributed_ds)
        with distribution_strategy.scope():
            config = masked_lm.MaskedLMConfig(
                init_checkpoint=self.get_temp_dir(),
                model=bert.PretrainerConfig(
                    encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                        vocab_size=30522, num_layers=1)),
                    cls_heads=[
                        bert.ClsHeadConfig(inner_dim=10,
                                           num_classes=2,
                                           name='next_sentence')
                    ]),
                train_data=data_config)
            task = masked_lm.MaskedLMTask(config)
            model = task.build_model()
            metrics = task.build_metrics()

        @tf.function
        def step_fn(features):
            return task.validation_step(features, model, metrics=metrics)

        distributed_outputs = distribution_strategy.run(
            step_fn, args=(next(train_iter), ))
        local_results = tf.nest.map_structure(
            distribution_strategy.experimental_local_results,
            distributed_outputs)
        logging.info('Dynamic padding:  local_results= %s', str(local_results))
        dynamic_metrics = {}
        for metric in metrics:
            dynamic_metrics[metric.name] = metric.result()

        data_config = pretrain_dataloader.BertPretrainDataConfig(
            is_training=False,
            input_path=input_path,
            seq_length=max_seq_length,
            max_predictions_per_seq=20,
            global_batch_size=batch_size)
        dataloader = pretrain_dataloader.BertPretrainDataLoader(data_config)
        distributed_ds = orbit.utils.make_distributed_dataset(
            distribution_strategy, dataloader.load)
        train_iter = iter(distributed_ds)
        with distribution_strategy.scope():
            metrics = task.build_metrics()

        @tf.function
        def step_fn_b(features):
            return task.validation_step(features, model, metrics=metrics)

        distributed_outputs = distribution_strategy.run(
            step_fn_b, args=(next(train_iter), ))
        local_results = tf.nest.map_structure(
            distribution_strategy.experimental_local_results,
            distributed_outputs)
        logging.info('Static padding:  local_results= %s', str(local_results))
        static_metrics = {}
        for metric in metrics:
            static_metrics[metric.name] = metric.result()
        for key in static_metrics:
            # We need to investigate the differences on losses.
            if key != 'next_sentence_loss':
                self.assertEqual(dynamic_metrics[key], static_metrics[key])