Example #1
0
def AddMultiCurveSubplot(fig,
                         tensors,
                         paddings,
                         labels,
                         xlabels=None,
                         **kwargs):
    """Adds a multi curve subplot to Matplotlib figure.

  Plots one line for each entry in tensors and assigns a plot label legend.

  Args:
    fig: The Matplotlib figure.
    tensors: List of tensors of shape [batch, length]
    paddings: Paddings for 'tensors' with shape [batch, length] with 0. in valid
      positions and 1. in invalid.
    labels: A list of tensor names (strings) of the same length as 'tensors'.
    xlabels: A string tensor of shape [batch] with an xlabel per batch.
    **kwargs: With optional, title, xlabel, ylabel, fontsize.
  """
    data = []
    row_labels = []
    for t, l in zip(tensors, labels):
        if t is not None:
            data.append(py_utils.ApplyPadding(paddings, t))
            row_labels.append(l)
    shape = py_utils.GetShape(data[0], 2)
    data = tf.reshape(tf.concat(data, -1), [shape[0], len(data), shape[1]])

    args = [data, py_utils.LengthsFromPaddings(paddings)]
    if xlabels is not None:
        args.append(xlabels)
    fig.AddSubplot(args,
                   plot_func=_AddMultiCurveRowPlots,
                   row_labels=row_labels,
                   **kwargs)
Example #2
0
 def tokenize_words(words_t):
     padded_tokenized_t, _, paddings_t = str_to_vocab_tokens(
         labels=words_t,
         maxlen=longest_word_length,
         append_eos=True,
         pad_to_maxlen=True,
         vocab_filepath=FLAGS.in_units_txt,
         load_token_ids_from_vocab=False,
         delimiter="",
     )
     # Either lengths or paddings are incorrect.
     lengths_t = py_utils.LengthsFromPaddings(paddings_t)
     ragged_tokenized_t = tf.RaggedTensor.from_tensor(padded_tokenized_t,
                                                      lengths=lengths_t)
     # Drop start-of-sentence-token
     ragged_tokenized_t = ragged_tokenized_t[:, 1:]
     lengths_t -= 1
     letters_t = vocab_id_to_token(
         id=ragged_tokenized_t.flat_values,
         vocab=vocab_tokens,
         load_token_ids_from_vocab=False,
     )
     ragged_letters_t = tf.RaggedTensor.from_row_lengths(
         letters_t, lengths_t)
     # Is capatilizationt he problem?
     return ragged_tokenized_t, ragged_letters_t