Esempio n. 1
0
def _make_seq2seq_model(temp_dir):
    vocab = test_util.make_vocab(os.path.join(temp_dir, "vocab.txt"),
                                 ["1", "2", "3", "4"])
    model = models.Transformer(source_inputter=inputters.WordEmbedder(20),
                               target_inputter=inputters.WordEmbedder(20),
                               num_layers=3,
                               num_units=20,
                               num_heads=4,
                               ffn_inner_dim=40)
    model.initialize(dict(source_vocabulary=vocab, target_vocabulary=vocab))
    return model
Esempio n. 2
0
 def testWordEmbedderForDecoder(self):
     vocab_file = test_util.make_vocab(
         os.path.join(self.get_temp_dir(), "vocab.txt"),
         ["the", "world", "hello", "toto"])
     embedder = text_inputter.WordEmbedder(embedding_size=10)
     embedder.set_decoder_mode(mark_start=True, mark_end=True)
     embedder.initialize({"vocabulary": vocab_file})
     features = self.evaluate(
         embedder.make_features(tf.constant("hello world")))
     self.assertEqual(features["length"], 3)
     self.assertEqual(
         embedder.get_length(features, ignore_special_tokens=True), 2)
     self.assertAllEqual(features["ids"], [1, 5, 4])
     self.assertAllEqual(features["ids_out"], [5, 4, 2])
Esempio n. 3
0
    def testWordEmbedderForDecoder(self):
        vocab_file = test_util.make_vocab(
            os.path.join(self.get_temp_dir(), "vocab.txt"),
            ["the", "world", "hello", "toto"],
        )
        embedder = text_inputter.WordEmbedder(embedding_size=10)
        embedder.set_decoder_mode(mark_start=True, mark_end=True)
        embedder.initialize({"vocabulary": vocab_file})
        features = embedder.make_features(tf.constant("hello world !"))
        self.assertEqual(features["length"], 4)
        self.assertEqual(embedder.get_length(features, ignore_special_tokens=True), 3)
        self.assertAllEqual(features["ids"], [1, 5, 4, 7])
        self.assertAllEqual(features["ids_out"], [5, 4, 7, 2])

        oov_tokens = embedder.get_oov_tokens(features)
        self.assertListEqual(oov_tokens.numpy().flatten().tolist(), [b"!"])
Esempio n. 4
0
 def testBeamSearchWithMultiSourceEncoder(self):
     shared_vocabulary = test_util.make_vocab(
         os.path.join(self.get_temp_dir(), "vocab.txt"), ["1", "2", "3"])
     data_config = {
         "source_1_vocabulary": shared_vocabulary,
         "source_2_vocabulary": shared_vocabulary,
         "target_vocabulary": shared_vocabulary,
     }
     params = {
         "beam_width": 2,
     }
     model = models.Transformer(inputters.ParallelInputter(
         [inputters.WordEmbedder(32),
          inputters.WordEmbedder(32)]),
                                inputters.WordEmbedder(32),
                                num_layers=3,
                                num_units=32,
                                num_heads=8,
                                ffn_inner_dim=64)
     model.initialize(data_config, params=params)
     model.serve_function().get_concrete_function()
Esempio n. 5
0
  def testLanguageModelInputter(self):
    vocabulary_path = test_util.make_vocab(
        os.path.join(self.get_temp_dir(), "vocab.txt"), ["a", "b", "c"])

    inputter = models.LanguageModelInputter(embedding_size=10)
    inputter.initialize({
        "vocabulary": vocabulary_path,
        "sequence_controls": {"start": True, "end": False}})
    features, labels = self.evaluate(inputter.make_features(tf.constant("a b c")))
    self.assertAllEqual(features["ids"], [1, 3, 4, 5])
    self.assertEqual(features["length"], 4)
    self.assertAllEqual(labels["ids"], [1, 3, 4])
    self.assertAllEqual(labels["ids_out"], [3, 4, 5])
    self.assertEqual(labels["length"], 3)

    # Backward compatibility mode.
    inputter = models.LanguageModelInputter(embedding_size=10)
    inputter.initialize({"vocabulary": vocabulary_path})
    features, labels = self.evaluate(inputter.make_features(tf.constant("a b c")))
    self.assertAllEqual(features["ids"], [3, 4, 5])
    self.assertEqual(features["length"], 3)
    self.assertAllEqual(labels["ids"], [3, 4, 5])
    self.assertAllEqual(labels["ids_out"], [4, 5, 2])
    self.assertEqual(labels["length"], 3)
Esempio n. 6
0
    def testCheckpointExport(self):
        vocab = test_util.make_vocab(
            os.path.join(self.get_temp_dir(), "vocab.txt"), ["a", "b", "c"])

        model = models.TransformerBase()
        model.initialize(dict(source_vocabulary=vocab,
                              target_vocabulary=vocab))
        model.create_variables()
        original_embedding = model.features_inputter.embedding.numpy()

        export_dir = self.get_temp_dir()
        exporter = exporters.CheckpointExporter()
        exporter.export(model, export_dir)

        model = models.TransformerBase()
        model.initialize(dict(source_vocabulary=vocab,
                              target_vocabulary=vocab))
        model.create_variables()

        checkpoint = tf.train.Checkpoint(model=model)
        checkpoint.restore(os.path.join(export_dir, "ckpt"))

        restored_embedding = model.features_inputter.embedding.numpy()
        self.assertAllEqual(restored_embedding, original_embedding)
Esempio n. 7
0
def _create_vocab(temp_dir):
    vocab_path = os.path.join(temp_dir, "vocab.txt")
    vocab = test_util.make_vocab(vocab_path, ["a", "b", "c"])
    return vocab, vocab_path