Esempio n. 1
0
  def test_prediction(self, num_classes):
    task_config = sentence_prediction.SentencePredictionConfig(
        model=self.get_model_config(num_classes=num_classes),
        train_data=self._train_data_config)
    task = sentence_prediction.SentencePredictionTask(task_config)
    model = task.build_model()

    test_data_path = os.path.join(self.get_temp_dir(), "test.tf_record")
    seq_length = 16
    num_examples = 100
    _create_fake_dataset(
        test_data_path,
        seq_length=seq_length,
        num_classes=num_classes,
        num_examples=num_examples)

    test_data_config = (
        sentence_prediction_dataloader.SentencePredictionDataConfig(
            input_path=test_data_path,
            seq_length=seq_length,
            is_training=False,
            label_type="int" if num_classes > 1 else "float",
            global_batch_size=16,
            drop_remainder=False,
            include_example_id=True))

    predictions = sentence_prediction.predict(task, test_data_config, model)
    self.assertLen(predictions, num_examples)
    for prediction in predictions:
      self.assertEqual(prediction.dtype,
                       tf.int64 if num_classes > 1 else tf.float32)
Esempio n. 2
0
  def test_task(self, init_cls_pooler):
    # Saves a checkpoint.
    pretrain_cfg = bert.PretrainerConfig(
        encoder=encoders.EncoderConfig(
            bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=768, num_classes=2, name="next_sentence")
        ])
    pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
    # The model variables will be created after the forward call.
    _ = pretrain_model(pretrain_model.inputs)
    ckpt = tf.train.Checkpoint(
        model=pretrain_model, **pretrain_model.checkpoint_items)
    init_path = ckpt.save(self.get_temp_dir())

    # Creates the task.
    config = sentence_prediction.SentencePredictionConfig(
        init_checkpoint=init_path,
        model=self.get_model_config(num_classes=2),
        train_data=self._train_data_config,
        init_cls_pooler=init_cls_pooler)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.initialize(model)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)
Esempio n. 3
0
  def test_metrics_and_losses(self, num_classes):
    config = sentence_prediction.SentencePredictionConfig(
        init_checkpoint=self.get_temp_dir(),
        model=self.get_model_config(num_classes),
        train_data=self._train_data_config)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    if num_classes == 1:
      self.assertIsInstance(metrics[0], tf.keras.metrics.MeanSquaredError)
    else:
      self.assertIsInstance(metrics[0],
                            tf.keras.metrics.SparseCategoricalAccuracy)

    dataset = task.build_inputs(config.train_data)
    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)

    logs = task.validation_step(next(iterator), model, metrics=metrics)
    loss = logs["loss"].numpy()
    if num_classes == 1:
      self.assertGreater(loss, 1.0)
    else:
      self.assertLess(loss, 1.0)
Esempio n. 4
0
  def test_sentence_prediction(self):
    config = sentence_prediction.SentencePredictionConfig(
        model=sentence_prediction.ModelConfig(
            encoder=encoders.EncoderConfig(
                bert=encoders.BertEncoderConfig(vocab_size=30522,
                                                num_layers=1)),
            num_classes=2))
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    ckpt = tf.train.Checkpoint(model=model)
    ckpt_path = ckpt.save(self.get_temp_dir())
    export_module_cls = export_savedmodel.lookup_export_module(task)
    serving_params = {"inputs_only": False}
    params = export_module_cls.Params(**serving_params)
    export_module = export_module_cls(params=params, model=model)
    export_dir = export_savedmodel_util.export(
        export_module,
        function_keys=["serve"],
        checkpoint_path=ckpt_path,
        export_savedmodel_dir=self.get_temp_dir())
    imported = tf.saved_model.load(export_dir)
    serving_fn = imported.signatures["serving_default"]

    dummy_ids = tf.ones((1, 5), dtype=tf.int32)
    inputs = dict(
        input_word_ids=dummy_ids,
        input_mask=dummy_ids,
        input_type_ids=dummy_ids)
    ref_outputs = model(inputs)
    outputs = serving_fn(**inputs)
    self.assertAllClose(ref_outputs, outputs["outputs"])
    self.assertEqual(outputs["outputs"].shape, (1, 2))
Esempio n. 5
0
  def test_task(self):
    config = sentence_prediction.SentencePredictionConfig(
        init_checkpoint=self.get_temp_dir(),
        model=self.get_model_config(2),
        train_data=self._train_data_config)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    metrics = task.build_metrics()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    optimizer = tf.keras.optimizers.SGD(lr=0.1)
    task.train_step(next(iterator), model, optimizer, metrics=metrics)
    task.validation_step(next(iterator), model, metrics=metrics)

    # Saves a checkpoint.
    pretrain_cfg = bert.BertPretrainerConfig(
        encoder=encoders.TransformerEncoderConfig(
            vocab_size=30522, num_layers=1),
        cls_heads=[
            bert.ClsHeadConfig(
                inner_dim=10, num_classes=3, name="next_sentence")
        ])
    pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
    ckpt = tf.train.Checkpoint(
        model=pretrain_model, **pretrain_model.checkpoint_items)
    ckpt.save(config.init_checkpoint)
    task.initialize(model)
 def test_sentence_prediction_text(self, inputs_only):
     vocab_file_path = os.path.join(self.get_temp_dir(), "vocab.txt")
     _create_fake_vocab_file(vocab_file_path)
     config = sentence_prediction.SentencePredictionConfig(
         model=sentence_prediction.ModelConfig(
             encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                 vocab_size=30522, num_layers=1)),
             num_classes=2))
     task = sentence_prediction.SentencePredictionTask(config)
     model = task.build_model()
     params = serving_modules.SentencePrediction.Params(
         inputs_only=inputs_only,
         parse_sequence_length=10,
         text_fields=["foo", "bar"],
         vocab_file=vocab_file_path)
     export_module = serving_modules.SentencePrediction(params=params,
                                                        model=model)
     examples = _create_fake_serialized_examples({
         "foo": b"hello world",
         "bar": b"hello world"
     })
     functions = export_module.get_inference_signatures({
         "serve_text_examples":
         "serving_default",
     })
     outputs = functions["serving_default"](examples)
     self.assertEqual(outputs["outputs"].shape, (10, 2))
Esempio n. 7
0
 def test_task_with_fit(self):
     config = sentence_prediction.SentencePredictionConfig(
         model=self.get_model_config(2), train_data=self._train_data_config)
     task = sentence_prediction.SentencePredictionTask(config)
     model = task.build_model()
     model = task.compile_model(model,
                                optimizer=tf.keras.optimizers.SGD(lr=0.1),
                                train_step=task.train_step,
                                metrics=task.build_metrics())
     dataset = task.build_inputs(config.train_data)
     logs = model.fit(dataset, epochs=1, steps_per_epoch=2)
     self.assertIn("loss", logs.history)
Esempio n. 8
0
    def _run_task(self, config):
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        metrics = task.build_metrics()

        strategy = tf.distribute.get_strategy()
        dataset = strategy.experimental_distribute_datasets_from_function(
            functools.partial(task.build_inputs, config.train_data))

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        return task.validation_step(next(iterator), model, metrics=metrics)
Esempio n. 9
0
    def _run_task(self, config):
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        metrics = task.build_metrics()

        strategy = tf.distribute.get_strategy()
        dataset = orbit.utils.make_distributed_dataset(strategy,
                                                       task.build_inputs,
                                                       config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
Esempio n. 10
0
    def _run_task(self, config):
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        metrics = task.build_metrics()

        strategy = tf.distribute.get_strategy()
        dataset = strategy.distribute_datasets_from_function(
            functools.partial(task.build_inputs, config.train_data))

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        model.save(os.path.join(self.get_temp_dir(), "saved_model"))
        return task.validation_step(next(iterator), model, metrics=metrics)
Esempio n. 11
0
  def test_np_metrics(self, metric_type, num_classes):
    config = sentence_prediction.SentencePredictionConfig(
        metric_type=metric_type,
        init_checkpoint=self.get_temp_dir(),
        model=self.get_model_config(num_classes),
        train_data=self._train_data_config)
    task = sentence_prediction.SentencePredictionTask(config)
    model = task.build_model()
    dataset = task.build_inputs(config.train_data)

    iterator = iter(dataset)
    strategy = tf.distribute.get_strategy()
    distributed_outputs = strategy.run(
        functools.partial(task.validation_step, model=model),
        args=(next(iterator),))
    outputs = tf.nest.map_structure(strategy.experimental_local_results,
                                    distributed_outputs)
    aggregated = task.aggregate_logs(step_outputs=outputs)
    aggregated = task.aggregate_logs(state=aggregated, step_outputs=outputs)
    self.assertIn(metric_type, task.reduce_aggregated_logs(aggregated))
Esempio n. 12
0
    def test_task(self):
        config = sentence_prediction.SentencePredictionConfig(
            network=bert.BertPretrainerConfig(
                encoders.TransformerEncoderConfig(vocab_size=30522,
                                                  num_layers=1),
                num_masked_tokens=0,
                cls_heads=[
                    bert.ClsHeadConfig(inner_dim=10,
                                       num_classes=3,
                                       name="sentence_prediction")
                ]),
            train_data=bert.BertSentencePredictionDataConfig(
                input_path="dummy", seq_length=128, global_batch_size=1))
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        metrics = task.build_metrics()
        dataset = task.build_inputs(config.train_data)

        iterator = iter(dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(iterator), model, optimizer, metrics=metrics)
        task.validation_step(next(iterator), model, metrics=metrics)
    def test_sentence_prediction(self, use_v2_feature_names):
        if use_v2_feature_names:
            input_word_ids_field = "input_word_ids"
            input_type_ids_field = "input_type_ids"
        else:
            input_word_ids_field = "input_ids"
            input_type_ids_field = "segment_ids"

        config = sentence_prediction.SentencePredictionConfig(
            model=sentence_prediction.ModelConfig(
                encoder=encoders.EncoderConfig(bert=encoders.BertEncoderConfig(
                    vocab_size=30522, num_layers=1)),
                num_classes=2))
        task = sentence_prediction.SentencePredictionTask(config)
        model = task.build_model()
        params = serving_modules.SentencePrediction.Params(
            inputs_only=True,
            parse_sequence_length=10,
            use_v2_feature_names=use_v2_feature_names)
        export_module = serving_modules.SentencePrediction(params=params,
                                                           model=model)
        functions = export_module.get_inference_signatures({
            "serve":
            "serving_default",
            "serve_examples":
            "serving_examples"
        })
        self.assertSameElements(functions.keys(),
                                ["serving_default", "serving_examples"])
        dummy_ids = tf.ones((10, 10), dtype=tf.int32)
        outputs = functions["serving_default"](dummy_ids)
        self.assertEqual(outputs["outputs"].shape, (10, 2))

        params = serving_modules.SentencePrediction.Params(
            inputs_only=False,
            parse_sequence_length=10,
            use_v2_feature_names=use_v2_feature_names)
        export_module = serving_modules.SentencePrediction(params=params,
                                                           model=model)
        functions = export_module.get_inference_signatures({
            "serve":
            "serving_default",
            "serve_examples":
            "serving_examples"
        })
        outputs = functions["serving_default"](input_word_ids=dummy_ids,
                                               input_mask=dummy_ids,
                                               input_type_ids=dummy_ids)
        self.assertEqual(outputs["outputs"].shape, (10, 2))

        dummy_ids = tf.ones((10, ), dtype=tf.int32)
        examples = _create_fake_serialized_examples({
            input_word_ids_field:
            dummy_ids,
            "input_mask":
            dummy_ids,
            input_type_ids_field:
            dummy_ids
        })
        outputs = functions["serving_examples"](examples)
        self.assertEqual(outputs["outputs"].shape, (10, 2))

        with self.assertRaises(ValueError):
            _ = export_module.get_inference_signatures({"foo": None})