def _testTokensToChars(self, tokens, expected_chars, expected_lengths): expected_chars = tf.nest.map_structure(tf.compat.as_bytes, expected_chars) chars = text.tokens_to_chars(tf.constant(tokens, dtype=tf.string)) chars, lengths = chars.to_tensor(), chars.row_lengths() chars, lengths = self.evaluate([chars, lengths]) self.assertListEqual(expected_chars, chars.tolist()) self.assertListEqual(expected_lengths, lengths.tolist())
def make_features(self, element=None, features=None, training=None): """Converts words to characters.""" if features is None: features = {} if "char_ids" in features: return features if "chars" in features: chars = features["chars"] else: features = super(CharEmbedder, self).make_features( element=element, features=features, training=training) chars = text.tokens_to_chars(features["tokens"]) chars = chars.to_tensor(default_value=constants.PADDING_TOKEN) features["char_ids"] = self.tokens_to_ids.lookup(chars) return features
def _testTokensToChars(self, tokens, expected_chars): expected_chars = tf.nest.map_structure(tf.compat.as_bytes, expected_chars) chars = text.tokens_to_chars(tf.constant(tokens, dtype=tf.string)) self.assertListEqual(chars.to_list(), expected_chars)