예제 #1
0
 def IdsToStrings(self, ids, lens):
     """Takes integer matrices and returns vectors of strings."""
     ids = py_utils.with_dependencies(
         [py_utils.assert_same_dim0([ids, lens])], ids)
     return tf.map_fn(
         lambda inputs: self._wpm_encoder.Decode(inputs[0][:inputs[1]]),
         (ids, lens),
         dtype=tf.string,
         parallel_iterations=30,
         back_prop=False)
예제 #2
0
    def IdsToStrings(self, ids, lens):
        """Takes int32 token ids and returns approximate detokenized strings."""
        ids = py_utils.with_dependencies(
            [py_utils.assert_same_dim0([ids, lens])], ids)

        def _ProcessRow(inputs):
            length = inputs[1]
            ids = tf.reshape(inputs[0][:length], [1, -1])
            tokens = self._tokenizer.detokenize(ids)
            return tf.strings.reduce_join(tokens.flat_values, separator=' ')

        return tf.map_fn(_ProcessRow, (ids, lens),
                         dtype=tf.string,
                         parallel_iterations=30,
                         back_prop=False)
예제 #3
0
def _SeqLenDim(nmap):
  """Returns the 0-th dim size of tensors in nmap.

  This is the max sequence length according to the shape of the inputs.

  Args:
    nmap: A `.NestedMap` of tensors. Every tensor's 0-th dim has the same size.

  Returns:
    A scalar tensor which is the size of 0-th dim of every tensors in nmap.
  """
  xs = nmap.Flatten()
  assert xs, 'nmap is empty.'
  with tf.control_dependencies(
      [py_utils.assert_same_dim0(xs, msg='recurrent._SeqLen')]):
    return tf.shape(xs[0])[0]