def AddMultiCurveSubplot(fig, tensors, paddings, labels, xlabels=None, **kwargs): """Adds a multi curve subplot to Matplotlib figure. Plots one line for each entry in tensors and assigns a plot label legend. Args: fig: The Matplotlib figure. tensors: List of tensors of shape [batch, length] paddings: Paddings for 'tensors' with shape [batch, length] with 0. in valid positions and 1. in invalid. labels: A list of tensor names (strings) of the same length as 'tensors'. xlabels: A string tensor of shape [batch] with an xlabel per batch. **kwargs: With optional, title, xlabel, ylabel, fontsize. """ data = [] row_labels = [] for t, l in zip(tensors, labels): if t is not None: data.append(py_utils.ApplyPadding(paddings, t)) row_labels.append(l) shape = py_utils.GetShape(data[0], 2) data = tf.reshape(tf.concat(data, -1), [shape[0], len(data), shape[1]]) args = [data, py_utils.LengthsFromPaddings(paddings)] if xlabels is not None: args.append(xlabels) fig.AddSubplot(args, plot_func=_AddMultiCurveRowPlots, row_labels=row_labels, **kwargs)
def tokenize_words(words_t): padded_tokenized_t, _, paddings_t = str_to_vocab_tokens( labels=words_t, maxlen=longest_word_length, append_eos=True, pad_to_maxlen=True, vocab_filepath=FLAGS.in_units_txt, load_token_ids_from_vocab=False, delimiter="", ) # Either lengths or paddings are incorrect. lengths_t = py_utils.LengthsFromPaddings(paddings_t) ragged_tokenized_t = tf.RaggedTensor.from_tensor(padded_tokenized_t, lengths=lengths_t) # Drop start-of-sentence-token ragged_tokenized_t = ragged_tokenized_t[:, 1:] lengths_t -= 1 letters_t = vocab_id_to_token( id=ragged_tokenized_t.flat_values, vocab=vocab_tokens, load_token_ids_from_vocab=False, ) ragged_letters_t = tf.RaggedTensor.from_row_lengths( letters_t, lengths_t) # Is capatilizationt he problem? return ragged_tokenized_t, ragged_letters_t