Example #1
0
 def _process(record):
     num, = tf.py_func(str_to_num, [record], [tf.float32])
     num = tf.stack([num, tf.square(num)])
     if use_nested_map:
         return py_utils.NestedMap(record=record, num=num), 1
     else:
         return [record, num], 1
Example #2
0
  def FinalizeImage(self):
    """Finishes creation of the overall figure, returning the image tensor."""
    subplot_grid_shape = self._subplot_grid_shape
    if subplot_grid_shape is None:
      subplot_grid_shape = (len(self._subplots), 1)

    # AddMatplotlibFigureSummary (due to restrictions of py_func) only supports
    # flattened list of tensors so we must do some bookkeeping to maintain a
    # mapping from _SubplotMetadata object to flattened_tensors.
    subplot_slices = []
    flattened_tensors = []
    for subplot in self._subplots:
      start = len(flattened_tensors)
      subplot_slices.append((start, start + len(subplot.tensor_list)))
      flattened_tensors.extend(subplot.tensor_list)

    def PlotFunc(fig, *numpy_data_list):
      gs = gridspec.GridSpec(*subplot_grid_shape, **self._gridspec_kwargs)
      for n, subplot in enumerate(self._subplots):
        axes = fig.add_subplot(gs[n])
        start, end = subplot_slices[n]
        subplot_data = numpy_data_list[start:end]
        subplot.plot_func(fig, axes, *subplot_data)

    func = functools.partial(_RenderMatplotlibFigures, self._figsize,
                             self._max_outputs, PlotFunc)
    batch_sizes = [tf.shape(t)[0] for t in flattened_tensors]
    num_tensors = len(flattened_tensors)
    with tf.control_dependencies([
        tf.assert_equal(
            batch_sizes, [batch_sizes[0]] * num_tensors, summarize=num_tensors)
    ]):
      return tf.py_func(
          func, flattened_tensors, tf.uint8, name='RenderMatplotlibFigures')
 def _process(source_id, record):
   num, = tf.py_func(str_to_num, [record], [tf.float32])
   num = tf.stack([num, tf.square(num)])
   if use_nested_map:
     return py_utils.NestedMap(
         source_id=source_id, record=record, num=num), bucket_fn(num)
   else:
     return [source_id, record, num], bucket_fn(num)
Example #4
0
    def _ProcessLine(self, line):
        """A single-text-line processor.

    Gets a string tensor representing a line of text that have been read from
    the input file, and splits it to graphemes (characters).
    We use original characters as the target labels, and the lowercased and
    punctuation-removed characters as the source labels.

    Args:
      line: a 1D string tensor.

    Returns:
      A list of tensors, in the expected order by __init__.
    """
        # Tokenize the input into integer ids.
        # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the
        # end-of-sentence token appended.
        tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds(
            tf.convert_to_tensor([line]))

        def Normalize(line):
            # Lowercase and remove punctuation.
            line = line.lower().translate(None,
                                          string.punctuation.encode('utf-8'))
            # Convert multiple consecutive spaces to a single one.
            line = b' '.join(line.split())
            return line

        normalized_line = tf.py_func(Normalize, [line],
                                     tf.string,
                                     stateful=False)
        _, src_labels, src_paddings = self.StringsToIds(tf.convert_to_tensor(
            [normalized_line]),
                                                        is_source=True)
        # The model expects the source without a start-of-sentence token.
        src_ids = src_labels

        # Compute the length for bucketing.
        bucket_key = tf.cast(
            tf.round(
                tf.maximum(tf.reduce_sum(1.0 - src_paddings),
                           tf.reduce_sum(1.0 - tgt_paddings))), tf.int32)
        tgt_weights = 1.0 - tgt_paddings

        # Return tensors in an order consistent with __init__.
        out_tensors = [
            src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels,
            tgt_weights
        ]
        return [tf.squeeze(t, axis=0) for t in out_tensors], bucket_key
Example #5
0
def bleu_score(predictions, labels, **unused_kwargs):
    """BLEU score computation between labels and predictions.

  An approximate BLEU scoring method since we do not glue word pieces or
  decode the ids and tokenize the output. By default, we use ngram order of 4
  and use brevity penalty. Also, this does not have beam search.

  Args:
    predictions: tensor, model predictions
    labels: tensor, gold output.

  Returns:
    bleu: int, approx bleu score
  """
    outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
    # Convert the outputs and labels to a [batch_size, input_length] tensor.
    outputs = tf.squeeze(outputs, axis=[-1, -2])
    labels = tf.squeeze(labels, axis=[-1, -2])

    bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32)
    return bleu, tf.constant(1.0)
Example #6
0
 def _process(record):
     num = tf.py_func(pickle.loads, [record], tf.int32)
     bucket_key = tf.shape(num)[0]
     return [num, tf.transpose(num, [1, 0, 2])], bucket_key
Example #7
0
 def _process(record):
     bucket_key = 1
     num, = tf.py_func(pickle.loads, [record], [tf.bool])
     return [num], bucket_key
Example #8
0
    def _ProcessLine(self, line):
        """A single-text-line processor.

    Gets a string tensor representing a line of text that have been read from
    the input file, and splits it to graphemes (characters).
    We use original characters as the target labels, and the lowercased and
    punctuation-removed characters as the source labels.

    Args:
      line: a 1D string tensor.

    Returns:
      A NestedMap containing the processed example.
    """
        p = self.params
        # Tokenize the input into integer ids.
        # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the
        # end-of-sentence token appended.
        tgt_ids, tgt_labels, tgt_paddings = self.tokenizer.StringsToIds(
            tf.convert_to_tensor([line]), p.target_max_length)
        # Because StringsToIds requires a vector but _ProcessLine is called for
        # individual lines, we need to manually remove the batch dimension.
        tgt_ids = tgt_ids[0]
        tgt_labels = tgt_labels[0]
        tgt_paddings = tgt_paddings[0]

        # This normalization function produces the "source" text from which the
        # Punctuator task is trained to reproduce the original "target" text.
        def Normalize(line):
            # Lowercase and remove punctuation.
            line = line.lower().translate(None,
                                          string.punctuation.encode('utf-8'))
            # Convert multiple consecutive spaces to a single one.
            line = b' '.join(line.split())
            return line

        normalized_line = tf.py_func(Normalize, [line],
                                     tf.string,
                                     stateful=False)
        _, src_labels, src_paddings = self.tokenizer.StringsToIds(
            tf.convert_to_tensor([normalized_line]), p.source_max_length)
        # Because StringsToIds requires a vector but _ProcessLine is called for
        # individual lines, we need to manually remove the batch dimension.
        src_labels = src_labels[0]
        src_paddings = src_paddings[0]
        # The model expects the source without a start-of-sentence token.
        src_ids = src_labels
        tgt_weights = 1.0 - tgt_paddings

        ret = py_utils.NestedMap()

        ret.src = py_utils.NestedMap()
        ret.src.ids = tf.cast(src_ids, dtype=tf.int32)
        ret.src.paddings = src_paddings

        ret.tgt = py_utils.NestedMap()
        ret.tgt.ids = tgt_ids
        ret.tgt.labels = tf.cast(tgt_labels, dtype=tf.int32)
        ret.tgt.weights = tgt_weights
        ret.tgt.paddings = tgt_paddings
        return ret
Example #9
0
 def Process(source_id, record):
   del source_id  # Unused.
   [num] = tf.py_func(int, [record], [tf.int64])
   return py_utils.NestedMap(data=num), 1
Example #10
0
 def Wrap(val):
   dtype = tf.as_dtype(val.dtype)
   assert dtype != tf.string  # tf.string is not supported by py_func.
   return tf.py_func(lambda: val, [], dtype)
Example #11
0
 def MyFn():
   return tf.py_func(lambda: next(it), [], [tf.float32, tf.float32])