def test_trim_and_pad_dataset(self):
     x = [{
         "inputs": [7, 8, 5, 6, 1],
         "targets": [3, 9, 1],
         "idx": [0]
     }, {
         "inputs": [8, 4, 9, 3, 5, 7, 9, 1],
         "targets": [4, 1],
         "idx": [1, 2]
     }]
     ds = create_default_dataset(x,
                                 feature_names=("inputs", "targets", "idx"))
     padded_ds = utils.trim_and_pad_dataset(ds,
                                            feature_lengths={
                                                "inputs": 7,
                                                "targets": 3
                                            })
     expected = [
         {
             "inputs": [7, 8, 5, 6, 1, 0, 0],
             "targets": [3, 9, 1],
             "idx": [0],
         },
         {
             # EOS is trimmed
             "inputs": [8, 4, 9, 3, 5, 7, 9],
             "targets": [4, 1, 0],
             "idx": [1, 2],
         }
     ]
     assert_dataset(padded_ds, expected, {
         "inputs": tf.int32,
         "targets": tf.int32
     })
Example #2
0
 def _pack_or_pad(self, ds: tf.data.Dataset,
                  packed_lengths: Mapping[str, int]) -> tf.data.Dataset:
     """Trim/pad to packed_lengths and optionally pack the input dataset."""
     if self.pack:
         ds = utils.trim_and_pack_dataset(ds, packed_lengths,
                                          self._use_custom_packing_ops)
     else:
         ds = utils.trim_and_pad_dataset(ds, packed_lengths)
     return ds