Example #1
0
  def testConcatInDepthWithSequence(self):
    a = [
        [[1], [-1], [-1]],
        [[1], [2], [3]],
        [[1], [2], [-1]]]
    b = [
        [[1], [2], [3], [4]],
        [[1], [2], [-1], [-1]],
        [[1], [2], [-1], [-1]]]
    expected = [
        [[1, 1], [0, 2], [0, 3], [0, 4]],
        [[1, 1], [2, 2], [3, 0], [0, 0]],
        [[1, 1], [2, 2], [0, 0], [0, 0]]]
    length_a = [1, 3, 2]
    length_b = [4, 2, 2]

    reduced, length = reducer.ConcatReducer().reduce_sequence(
        [tf.constant(a, dtype=tf.float32), tf.constant(b, dtype=tf.float32)],
        [tf.constant(length_a), tf.constant(length_b)])

    self.assertEqual(2, reduced.get_shape().as_list()[-1])

    with self.test_session() as sess:
      reduced, length = sess.run([reduced, length])
      self.assertAllEqual(expected, reduced)
      self.assertAllEqual([4, 3, 2], length)
Example #2
0
    def testMixedInputter(self):
        vocab_file = self._makeTextFile("vocab.txt",
                                        ["the", "world", "hello", "toto"])
        vocab_alt_file = self._makeTextFile("vocab_alt.txt",
                                            ["h", "e", "l", "w", "o"])
        data_file = self._makeTextFile("data.txt", ["hello world !"])

        mixed_inputter = inputter.MixedInputter(
            [
                text_inputter.WordEmbedder(embedding_size=10),
                text_inputter.CharConvEmbedder(10, 5),
            ],
            reducer=reducer.ConcatReducer(),
        )
        self.assertEqual(mixed_inputter.num_outputs, 1)
        features, transformed = self._makeDataset(
            mixed_inputter,
            data_file,
            data_config={
                "1_vocabulary": vocab_file,
                "2_vocabulary": vocab_alt_file
            },
            shapes={
                "char_ids": [None, None, None],
                "ids": [None, None],
                "length": [None],
            },
        )
        self.assertAllEqual([1, 3, 15], transformed.shape)
Example #3
0
    def testMixedInputter(self):
        vocab_file = self._makeTextFile("vocab.txt",
                                        ["the", "world", "hello", "toto"])
        vocab_alt_file = self._makeTextFile("vocab_alt.txt",
                                            ["h", "e", "l", "w", "o"])
        data_file = self._makeTextFile("data.txt", ["hello world !"])

        mixed_inputter = inputter.MixedInputter(
            [
                text_inputter.WordEmbedder("vocabulary_file_1",
                                           embedding_size=10),
                text_inputter.CharConvEmbedder("vocabulary_file_2", 10, 5)
            ],
            reducer=reducer.ConcatReducer())
        self.assertEqual(mixed_inputter.num_outputs, 1)
        features, transformed = self._makeDataset(mixed_inputter,
                                                  data_file,
                                                  metadata={
                                                      "vocabulary_file_1":
                                                      vocab_file,
                                                      "vocabulary_file_2":
                                                      vocab_alt_file
                                                  },
                                                  shapes={
                                                      "char_ids":
                                                      [None, None, None],
                                                      "ids": [None, None],
                                                      "length": [None]
                                                  })

        with self.test_session() as sess:
            sess.run(tf.tables_initializer())
            sess.run(tf.global_variables_initializer())
            features, transformed = sess.run([features, transformed])
            self.assertAllEqual([1, 3, 15], transformed.shape)
Example #4
0
    def testConcatInDepthWithSequence(self):
        a = [[[1], [-1], [-1]], [[1], [2], [3]], [[1], [2], [-1]]]
        b = [[[1], [2], [3], [4]], [[1], [2], [-1], [-1]],
             [[1], [2], [-1], [-1]]]
        expected = [
            [[1, 1], [0, 2], [0, 3], [0, 4]],
            [[1, 1], [2, 2], [3, 0], [0, 0]],
            [[1, 1], [2, 2], [0, 0], [0, 0]],
        ]
        length_a = [1, 3, 2]
        length_b = [4, 2, 2]

        reduced, length = reducer.ConcatReducer()(
            [
                tf.constant(a, dtype=tf.float32),
                tf.constant(b, dtype=tf.float32)
            ],
            [tf.constant(length_a),
             tf.constant(length_b)],
        )

        self.assertEqual(2, reduced.shape[-1])
        reduced, length = self.evaluate([reduced, length])
        self.assertAllEqual(expected, reduced)
        self.assertAllEqual([4, 3, 2], length)
Example #5
0
    def testConcatInTimeWithSequenceAndMaxTimeMismatch(self):
        a = [[[1], [-1], [-1]], [[1], [2], [3]], [[1], [2], [-1]]]
        b = [
            [[1], [2], [3], [4], [-1], [-1]],
            [[1], [2], [-1], [-1], [-1], [-1]],
            [[1], [2], [-1], [-1], [-1], [-1]],
        ]
        expected = [
            [[1], [1], [2], [3], [4]],
            [[1], [2], [3], [1], [2]],
            [[1], [2], [1], [2], [0]],
        ]
        length_a = [1, 3, 2]
        length_b = [4, 2, 2]

        reduced, length = reducer.ConcatReducer(axis=1)(
            [
                tf.constant(a, dtype=tf.float32),
                tf.constant(b, dtype=tf.float32)
            ],
            [tf.constant(length_a),
             tf.constant(length_b)],
        )

        self.assertEqual(1, reduced.shape[-1])
        reduced, length = self.evaluate([reduced, length])
        self.assertAllEqual(expected, reduced)
        self.assertAllEqual([5, 5, 4], length)
Example #6
0
 def __init__(self,
              num_layers,
              num_units,
              bidirectional=False,
              reducer=reducer_lib.ConcatReducer(),
              dropout=0,
              residual_connections=False,
              **kwargs):
     """Initializes the layer.
 Args:
   num_layers: Number of stacked LSTM layers.
   num_units: Dimension of the output space of each LSTM.
   bidirectional: Make each layer bidirectional.
   reducer: A :class:`opennmt.layers.Reducer` instance to merge
     the bidirectional states and outputs of each layer.
   dropout: The probability to drop units in each layer output.
   residual_connections: If ``True``, each layer input will be added to its
     output.
   **kwargs: Additional layer arguments.
 """
     super(GRU, self).__init__(**kwargs)
     rnn_layers = [
         _RNNWrapper(tf.keras.layers.GRU(num_units,
                                         return_sequences=True,
                                         return_state=True),
                     bidirectional=bidirectional,
                     reducer=reducer) for _ in range(num_layers)
     ]
     self.layers = [
         common.LayerWrapper(layer,
                             output_dropout=dropout,
                             residual_connection=residual_connections)
         for layer in rnn_layers
     ]
Example #7
0
    def testNestedInputtersWithFlatDataFiles(self):
        inputters = inputter.ParallelInputter(
            [
                record_inputter.SequenceRecordInputter(10),
                record_inputter.SequenceRecordInputter(10),
            ],
            reducer=reducer.SumReducer(),
        )
        inputters = inputter.ParallelInputter(
            [
                record_inputter.SequenceRecordInputter(10),
                inputters,
            ],
            reducer=reducer.ConcatReducer(),
        )

        self.assertListEqual(inputters._structure(), [None, [None, None]])

        empty_file = os.path.join(self.get_temp_dir(), "test.txt")
        with open(empty_file, "w"):
            pass

        with self.assertRaises(ValueError):
            inputters.make_inference_dataset([empty_file, empty_file],
                                             batch_size=2)
        inputters.make_inference_dataset([empty_file, empty_file, empty_file],
                                         batch_size=2)
Example #8
0
  def testConcatInTimeWithSequenceAndMaxTimeMismatch(self):
    a = [
        [[1], [-1], [-1]],
        [[1], [2], [3]],
        [[1], [2], [-1]]]
    b = [
        [[1], [2], [3], [4], [-1], [-1]],
        [[1], [2], [-1], [-1], [-1], [-1]],
        [[1], [2], [-1], [-1], [-1], [-1]]]
    expected = [
        [[1], [1], [2], [3], [4]],
        [[1], [2], [3], [1], [2]],
        [[1], [2], [1], [2], [0]]]
    length_a = [1, 3, 2]
    length_b = [4, 2, 2]

    reduced, length = reducer.ConcatReducer(axis=1).reduce_sequence(
        [tf.constant(a, dtype=tf.float32), tf.constant(b, dtype=tf.float32)],
        [tf.constant(length_a), tf.constant(length_b)])

    self.assertEqual(1, reduced.get_shape().as_list()[-1])

    with self.test_session() as sess:
      reduced, length = sess.run([reduced, length])
      self.assertAllEqual(expected, reduced)
      self.assertAllEqual([5, 5, 4], length)
Example #9
0
 def testParallelEncoder(self):
   sequence_lengths = [[3, 5, 2], [6, 6, 4]]
   inputs = [tf.zeros([3, 5, 10]), tf.zeros([3, 6, 10])]
   encoder = encoders.ParallelEncoder(
       [DenseEncoder(1, 20), DenseEncoder(2, 20)],
       outputs_reducer=reducer.ConcatReducer(axis=1))
   outputs, state, encoded_length = encoder(
       inputs, sequence_length=sequence_lengths)
   self.assertEqual(len(state), 3)
   outputs, encoded_length = self.evaluate([outputs, encoded_length])
   self.assertAllEqual([3, 11, 20], outputs.shape)
   self.assertAllEqual([9, 11, 6], encoded_length)
Example #10
0
 def testBRNN(self):
     cell = rnn.make_rnn_cell(3, 10, dropout=0.1, residual_connections=True)
     rnn_layer = rnn.RNN(cell,
                         bidirectional=True,
                         reducer=reducer.ConcatReducer())
     inputs = tf.random.uniform([4, 5, 5])
     outputs, states = rnn_layer(inputs, training=True)
     self.assertListEqual(outputs.shape.as_list(), [4, 5, 20])
     self.assertIsInstance(states, tuple)
     self.assertEqual(len(states), 3)
     self.assertEqual(len(states[0]), 2)
     self.assertListEqual(states[0][0].shape.as_list(), [4, 20])
Example #11
0
 def _encodeInParallel(self,
                       inputs,
                       sequence_length=None,
                       outputs_layer_fn=None,
                       combined_output_layer_fn=None):
   columns = [DenseEncoder(1, 20), DenseEncoder(1, 20)]
   encoder = encoders.ParallelEncoder(
       columns,
       outputs_reducer=reducer.ConcatReducer(),
       outputs_layer_fn=outputs_layer_fn,
       combined_output_layer_fn=combined_output_layer_fn)
   outputs, _, _ = encoder(inputs, sequence_length=sequence_length)
   return self.evaluate(outputs)
Example #12
0
 def _encodeInParallel(self,
                       inputs,
                       sequence_length=None,
                       outputs_layer_fn=None,
                       combined_output_layer_fn=None):
   columns = [
       encoders.UnidirectionalRNNEncoder(1, 20),
       encoders.UnidirectionalRNNEncoder(1, 20)]
   encoder = encoders.ParallelEncoder(
       columns,
       outputs_reducer=reducer.ConcatReducer(),
       outputs_layer_fn=outputs_layer_fn,
       combined_output_layer_fn=combined_output_layer_fn)
   return encoder.encode(inputs, sequence_length=sequence_length)
Example #13
0
  def __init__(self, cell, bidirectional=False, reducer=reducer_lib.ConcatReducer(), **kwargs):
    """Initializes the layer.

    Args:
      cell: The RNN cell to use.
      bidirectional: Make this layer bidirectional.
      reducer: A :class:`opennmt.layers.Reducer` instance to merge
        bidirectional states and outputs.
      **kwargs: Additional layer arguments.

    See Also:
      :func:`opennmt.layers.make_rnn_cell`
    """
    rnn = tf.keras.layers.RNN(cell, return_sequences=True, return_state=True)
    super(RNN, self).__init__(rnn, bidirectional=bidirectional, reducer=reducer, **kwargs)
Example #14
0
 def testParallelEncoder(self):
   sequence_lengths = [[3, 5, 2], [6, 6, 4]]
   inputs = [tf.zeros([3, 5, 10]), tf.zeros([3, 6, 10])]
   encoder = encoders.ParallelEncoder(
       [DenseEncoder(1, 20), DenseEncoder(2, 20)],
       outputs_reducer=reducer.ConcatReducer(axis=1))
   outputs, state, encoded_length = encoder.encode(
       inputs, sequence_length=sequence_lengths)
   self.assertEqual(len(state), 3)
   if not compat.is_tf2():
     with self.test_session() as sess:
       sess.run(tf.global_variables_initializer())
   outputs, encoded_length = self.evaluate([outputs, encoded_length])
   self.assertAllEqual([3, 11, 20], outputs.shape)
   self.assertAllEqual([9, 11, 6], encoded_length)
Example #15
0
 def _encodeInParallel(self,
                       inputs,
                       sequence_length=None,
                       outputs_layer_fn=None,
                       combined_output_layer_fn=None):
   columns = [DenseEncoder(1, 20), DenseEncoder(1, 20)]
   encoder = encoders.ParallelEncoder(
       columns,
       outputs_reducer=reducer.ConcatReducer(),
       outputs_layer_fn=outputs_layer_fn,
       combined_output_layer_fn=combined_output_layer_fn)
   outputs, _, _ = encoder.encode(inputs, sequence_length=sequence_length)
   if not compat.is_tf2():
     with self.test_session() as sess:
       sess.run(tf.global_variables_initializer())
   return self.evaluate(outputs)
Example #16
0
 def testParallelEncoderSameInput(self):
     sequence_length = [17, 21, 20]
     inputs = _build_dummy_sequences(sequence_length)
     encoder = encoders.ParallelEncoder(
         [
             encoders.UnidirectionalRNNEncoder(1, 20),
             encoders.UnidirectionalRNNEncoder(1, 20)
         ],
         outputs_reducer=reducer.ConcatReducer())
     outputs, _, encoded_length = encoder.encode(
         inputs, sequence_length=sequence_length)
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         outputs, encoded_length = sess.run([outputs, encoded_length])
         self.assertAllEqual([3, 21, 40], outputs.shape)
         self.assertAllEqual(sequence_length, encoded_length)
Example #17
0
  def __init__(self, rnn, bidirectional=False, reducer=reducer_lib.ConcatReducer(), **kwargs):
    """Initializes the layer.

    Args:
      rnn: The RNN layer to extend, built with ``return_sequences`` and
        ``return_state`` enabled.
      bidirectional: Make this layer bidirectional.
      reducer: A :class:`opennmt.layers.Reducer` instance to merge
        bidirectional states and outputs.
      **kwargs: Additional layer arguments.
    """
    super(_RNNWrapper, self).__init__(**kwargs)
    self.rnn = rnn
    self.reducer = reducer
    self.bidirectional = bidirectional
    if bidirectional:
      self.rnn = tf.keras.layers.Bidirectional(self.rnn, merge_mode=None)
Example #18
0
 def testParallelEncoder(self):
   sequence_lengths = [[17, 21, 20], [10, 9, 15]]
   inputs = [
       _build_dummy_sequences(length) for length in sequence_lengths]
   encoder = encoders.ParallelEncoder([
       encoders.UnidirectionalRNNEncoder(1, 20),
       encoders.UnidirectionalRNNEncoder(1, 20)],
       outputs_reducer=reducer.ConcatReducer(axis=1))
   outputs, state, encoded_length = encoder.encode(
       inputs, sequence_length=sequence_lengths)
   self.assertEqual(2, len(state))
   for s in state:
     self.assertIsInstance(s, tf.contrib.rnn.LSTMStateTuple)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     outputs, encoded_length = sess.run([outputs, encoded_length])
     self.assertAllEqual([3, 35, 20], outputs.shape)
     self.assertAllEqual([27, 30, 35], encoded_length)
Example #19
0
    def testMixedInputter(self):
        vocab_file = os.path.join(self.get_temp_dir(), "vocab.txt")
        vocab_alt_file = os.path.join(self.get_temp_dir(), "vocab_alt.txt")
        data_file = os.path.join(self.get_temp_dir(), "data.txt")

        with open(vocab_file, "w") as vocab:
            vocab.write("the\n" "world\n" "hello\n" "toto\n")
        with open(vocab_alt_file, "w") as vocab_alt:
            vocab_alt.write("h\n" "e\n" "l\n" "w\n" "o\n")
        with open(data_file, "w") as data:
            data.write("hello world !\n")

        mixed_inputter = inputter.MixedInputter(
            [
                text_inputter.WordEmbedder("vocabulary_file_1",
                                           embedding_size=10),
                text_inputter.CharConvEmbedder("vocabulary_file_2", 10, 5)
            ],
            reducer=reducer.ConcatReducer())
        features, transformed = self._makeDataset(mixed_inputter,
                                                  data_file,
                                                  metadata={
                                                      "vocabulary_file_1":
                                                      vocab_file,
                                                      "vocabulary_file_2":
                                                      vocab_alt_file
                                                  },
                                                  shapes={
                                                      "char_ids":
                                                      [None, None, None],
                                                      "ids": [None, None],
                                                      "length": [None]
                                                  })

        self.assertNotIn("tokens", features)

        with self.test_session() as sess:
            sess.run(tf.tables_initializer())
            sess.run(tf.global_variables_initializer())
            features, transformed = sess.run([features, transformed])
            self.assertAllEqual([1, 3, 15], transformed.shape)
Example #20
0
  def testMixedInputter(self):
    with open(vocab_file, "w") as vocab:
      vocab.write("the\n"
                  "world\n"
                  "hello\n"
                  "toto\n")
    with open(vocab_alt_file, "w") as vocab_alt:
      vocab_alt.write("h\n"
                      "e\n"
                      "l\n"
                      "w\n"
                      "o\n")
    with open(data_file, "w") as data:
      data.write("hello world !\n")

    mixed_inputter = inputter.MixedInputter([
        text_inputter.WordEmbedder("vocabulary_file_1", embedding_size=10),
        text_inputter.CharConvEmbedder("vocabulary_file_2", 10, 5)],
        reducer=reducer.ConcatReducer())

    data, transformed = _first_element(
        mixed_inputter,
        data_file,
        {"vocabulary_file_1": vocab_file, "vocabulary_file_2": vocab_alt_file})

    input_receiver = mixed_inputter.get_serving_input_receiver()
    self.assertIn("ids", input_receiver.features)
    self.assertIn("char_ids", input_receiver.features)

    with self.test_session() as sess:
      sess.run(tf.tables_initializer())
      sess.run(tf.global_variables_initializer())
      data, transformed = sess.run([data, transformed])
      self.assertNotIn("raw", data)
      self.assertNotIn("tokens", data)
      self.assertIn("ids", data)
      self.assertIn("char_ids", data)
      self.assertAllEqual([1, 3, 15], transformed.shape)