def IdsToStrings(self, ids, lens): """Takes integer matrices and returns vectors of strings.""" ids = py_utils.with_dependencies( [py_utils.assert_same_dim0([ids, lens])], ids) return tf.map_fn( lambda inputs: self._wpm_encoder.Decode(inputs[0][:inputs[1]]), (ids, lens), dtype=tf.string, parallel_iterations=30, back_prop=False)
def IdsToStrings(self, ids, lens): """Takes int32 token ids and returns approximate detokenized strings.""" ids = py_utils.with_dependencies( [py_utils.assert_same_dim0([ids, lens])], ids) def _ProcessRow(inputs): length = inputs[1] ids = tf.reshape(inputs[0][:length], [1, -1]) tokens = self._tokenizer.detokenize(ids) return tf.strings.reduce_join(tokens.flat_values, separator=' ') return tf.map_fn(_ProcessRow, (ids, lens), dtype=tf.string, parallel_iterations=30, back_prop=False)
def _SeqLenDim(nmap): """Returns the 0-th dim size of tensors in nmap. This is the max sequence length according to the shape of the inputs. Args: nmap: A `.NestedMap` of tensors. Every tensor's 0-th dim has the same size. Returns: A scalar tensor which is the size of 0-th dim of every tensors in nmap. """ xs = nmap.Flatten() assert xs, 'nmap is empty.' with tf.control_dependencies( [py_utils.assert_same_dim0(xs, msg='recurrent._SeqLen')]): return tf.shape(xs[0])[0]