Example #1
0
def SequenceAppendToken(x, x_paddings, token, extend=False):
    """Appends <token> to sequence `x`.

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.
    token: The token to append (of type integer).
    extend: Whether to extend `x` along the length dimension, this must be true
      for any sequence length in `x` that is `x_len_max` or else an invalid
      sequence will be emitted.

  Returns:
    A tuple.
      - The new sequence, Tensor of shape [batch_size, x_len_max].
      - The new paddings, Tensor of shape [batch_size, x_len_max].
  """
    batch_size = py_utils.GetShape(x)[0]
    x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32)
    if extend:
        x = tf.pad(x, [[0, 0], [0, 1]])
    # Mask all invalid entries of `x` to 0.
    x *= tf.sequence_mask(x_len, py_utils.GetShape(x)[1], x.dtype)
    # Append the <token> based on `x_len`.
    x += tf.scatter_nd(tf.stack([tf.range(batch_size), x_len], axis=1),
                       tf.cast(tf.fill([batch_size], token), x.dtype),
                       py_utils.GetShape(x))
    x_paddings = 1 - tf.sequence_mask(x_len + 1,
                                      py_utils.GetShape(x)[1],
                                      x_paddings.dtype)
    return x, x_paddings
Example #2
0
  def testContiguousCanvasUnderUniformRollinPolicy(self):
    """Tests for valid canvas size."""
    with self.session(use_gpu=True):
      params = insertion.SymbolInsertionLayer.Params()
      params.name = 'insertion'
      params.rollin_policy = 'oracle'
      params.oracle_policy = 'uniform'

      insertion_layer = insertion.SymbolInsertionLayer(params)

      batch_size = 4
      time_dim = 10

      inputs = tf.tile(
          tf.expand_dims(tf.range(time_dim), 0) + 100, [batch_size, 1])
      inputs_len = tf.random.uniform([batch_size], 0, time_dim, tf.int32)
      paddings = 1 - tf.sequence_mask(inputs_len, time_dim, tf.int32)
      spec = insertion_layer.FProp(
          None, inputs, paddings, force_sample_last_token=False)

      for _ in range(1000):
        canvas, canvas_paddings = self.evaluate(
            [spec.canvas, spec.canvas_paddings])

        for b in range(batch_size):
          length = np.sum(1 - canvas_paddings[b, :]).astype(np.int32)
          # Check for valid part of the canvas and padding.
          for l in range(length):
            self.assertEqual(canvas_paddings[b, l], 0)
            self.assertNotEqual(canvas[b, l], 0)
          # Check for invalid part of the canvas and padding.
          for l in range(length, canvas.shape[1]):
            self.assertEqual(canvas_paddings[b, l], 1)
            self.assertEqual(canvas[b, l], 0)
Example #3
0
  def _StringsToIdsImpl(self, strs, max_length, append_eos, languages):
    del append_eos
    del languages

    p = self.params

    tokens = self._tokenizer.tokenize(strs)
    num_tokens = tokens.row_lengths(-1)

    if max_length is None:
      labels = tokens.to_tensor(default_value=p.target_unk_id)
    else:
      output_shape = tf.convert_to_tensor(strs).shape + [max_length]
      labels = tokens.to_tensor(
          default_value=p.target_unk_id, shape=output_shape)
      num_tokens = tf.minimum(num_tokens, max_length)

    ids = tf.concat([
        tf.expand_dims(tf.ones_like(strs, tf.int32) * p.target_sos_id, -1),
        labels[:, :-1]
    ], -1)

    paddings = 1.0 - tf.sequence_mask(
        num_tokens, maxlen=max_length, dtype=tf.float32)
    return ids, labels, paddings
Example #4
0
 def UnstackFeatures(self, src_inputs, src_paddings):
     """Unstacks src_input and src_paddings based off stack height."""
     sh = self.params.stack_height
     bs, old_series_length, _, channels = py_utils.GetShape(src_inputs)
     unstacked_series_length = old_series_length * sh
     src_inputs = tf.reshape(src_inputs,
                             [bs, unstacked_series_length, -1, channels])
     content = 1 - src_paddings
     lengths = tf.cast(sh * tf.reduce_sum(content, axis=1), tf.int32)
     mask = tf.sequence_mask(lengths, maxlen=unstacked_series_length)
     src_paddings = 1 - tf.cast(mask, tf.int32)
     return src_inputs, src_paddings
Example #5
0
  def testLayerNormalizedLSTMCellLeanExt(self):
    cell_p = self._GetParams()

    seqlen, batch, input_dim = 4, 2, 2
    inputs = tf.convert_to_tensor(
        np.random.rand(seqlen, batch, input_dim).astype(np.float32))
    input_lens = np.random.randint(1, seqlen + 1, size=batch)
    paddings = 1. - tf.sequence_mask(
        input_lens, maxlen=seqlen, dtype=tf.float32)
    paddings = tf.transpose(paddings)
    reset_mask = tf.zeros((seqlen, batch), tf.float32)
    m0 = tf.convert_to_tensor(
        np.random.rand(batch, input_dim).astype(np.float32))
    c0 = tf.convert_to_tensor(
        np.random.rand(batch, input_dim).astype(np.float32))
    state0 = py_utils.NestedMap(m=m0, c=c0)

    with self.session():
      cell = cell_p.Instantiate()
      self.evaluate(tf.global_variables_initializer())

      # The canonical path
      state = state0
      for i in range(seqlen):
        state, _ = cell.FPropDefaultTheta(
            state,
            py_utils.NestedMap(
                act=[inputs[i, :, :]],
                padding=paddings[i, :, tf.newaxis],
                reset_mask=reset_mask[i, :, tf.newaxis]))
      expected_state = self.evaluate(state)

      # Taking input projection outside of the loop.
      cell_theta = cell.theta.copy()
      cell_theta.wm_i = cell_theta.wm[:cell.params.num_input_nodes, :]
      cell_theta.wm_h = cell_theta.wm[cell.params.num_input_nodes:, :]
      proj_inputs = cell.ProjectInputSequence(cell_theta,
                                              py_utils.NestedMap(act=[inputs]))
      state = state0
      for i in range(seqlen):
        state, _ = cell.FPropWithProjectedInput(
            cell_theta, state,
            py_utils.NestedMap(
                proj_inputs=proj_inputs[i, :, :],
                padding=paddings[i, :, tf.newaxis],
                reset_mask=reset_mask[i, :, tf.newaxis]))
      actual_state = self.evaluate(state)

    tf.logging.info('expected_state:{}'.format(expected_state))
    tf.logging.info('actual_state:{}'.format(actual_state))
    self.assertAllClose(expected_state.m, actual_state.m)
    self.assertAllClose(expected_state.c, actual_state.c)
Example #6
0
def SequenceConcat(x, x_paddings, y, y_paddings, pad=0):
    """Concats sequence `x` with sequence `y`.

  This function is length aware (based off the paddings).

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.
    y: A sequence of tokens of shape [batch_size, y_len_max].
    y_paddings: The paddings of `y`.
    pad: The <pad> token to fill the concatenated sequence (of type integer).

  Returns:
    A tuple.
      - Concatenation of `x` and `y` of shape
        [batch_size, x_len_max + y_len_max].
      - Paddings of the concatenation of shape
        [batch_size, x_len_max + y_len_max].
  """
    # Get the length (w/ eos).
    x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32)
    y_len = tf.cast(tf.round(tf.reduce_sum(1 - y_paddings, 1)), tf.int32)

    batch_size = py_utils.GetShape(x)[0]
    y_len_max = py_utils.GetShape(y)[1]

    # Pad `x` with necessary <pad>.
    x = tf.concat([x, tf.fill(py_utils.GetShape(y), pad)], 1)
    # Replace all <pad> with 0.
    x = tf.where(tf.not_equal(x, pad), x, tf.fill(py_utils.GetShape(x), 0))

    # Compute the write indices of `y` in `xy`.
    indices = tf.stack([
        tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, y_len_max]),
        (tf.tile(tf.expand_dims(tf.range(y_len_max), 0), [batch_size, 1]) +
         tf.expand_dims(x_len, 1)),
    ], 2)

    xy = x + tf.scatter_nd(indices, y, py_utils.GetShape(x))

    # We need to remap all <pad> to `pad`.
    xy = tf.where(
        tf.less(tf.expand_dims(tf.range(py_utils.GetShape(xy)[1]), 0),
                tf.expand_dims(x_len + y_len, 1)), xy,
        tf.fill(py_utils.GetShape(xy), pad))
    xy_paddings = 1 - tf.sequence_mask(x_len + y_len,
                                       py_utils.GetShape(xy)[1],
                                       x_paddings.dtype)
    return xy, xy_paddings
Example #7
0
  def _GetTestInputs(self, packed_input):
    seqlen, batch, input_dim, output_dim = 4, 5, 7, 9
    inputs = tf.convert_to_tensor(
        np.random.rand(seqlen, batch, input_dim).astype(np.float32))
    input_lens = np.random.randint(1, seqlen + 1, size=batch)
    padding = 1. - tf.sequence_mask(input_lens, maxlen=seqlen, dtype=tf.float32)
    padding = tf.transpose(padding)[:, :, tf.newaxis]
    segment_id = None
    if packed_input:
      segment_id = tf.convert_to_tensor(
          np.random.randint(0, seqlen, (seqlen, batch, 1), np.int32))

    m = tf.convert_to_tensor(
        np.random.rand(batch, output_dim).astype(np.float32))
    c = tf.convert_to_tensor(
        np.random.rand(batch, output_dim).astype(np.float32))
    return inputs, padding, m, c, segment_id
Example #8
0
def SequenceTrimLastToken(x, x_paddings):
    """Trims the last token off of sequence `x`, and set trimmed elements to 0.

  Args:
    x: A sequence of tokens of shape [batch_size, x_len_max].
    x_paddings: The paddings of `x`.

  Returns:
    A tuple.
      - The new sequence, Tensor of shape [batch_size, x_len_max].
      - The new paddings, Tensor of shape [batch_size, x_len_max].
  """
    x_len = tf.reduce_sum(1 - x_paddings, 1)
    x_len_max = py_utils.GetShape(x)[1]
    x_trimmed_len = tf.maximum(x_len - 1, 0)
    x_trimmed_paddings = tf.sequence_mask(x_trimmed_len, x_len_max,
                                          x_paddings.dtype)
    x_trimmed = x * tf.cast(x_trimmed_paddings, x.dtype)
    return x_trimmed, 1 - x_trimmed_paddings
Example #9
0
    def _MaybePadSourceInputs(self, src_inputs, src_paddings):
        p = self.params
        if not p.append_eos_frame:
            return src_inputs, src_paddings

        per_src_len = tf.reduce_sum(1 - src_paddings, 1)
        per_src_len += 1
        max_src_len = tf.reduce_max(per_src_len)
        input_shape = tf.shape(src_inputs)
        input_len = tf.maximum(input_shape[1], tf.cast(max_src_len, tf.int32))
        pad_steps = input_len - input_shape[1]
        src_inputs = tf.concat([
            src_inputs,
            tf.zeros(inplace_ops.inplace_update(input_shape, 1, pad_steps),
                     src_inputs.dtype)
        ], 1)
        src_paddings = 1 - tf.sequence_mask(
            tf.reshape(per_src_len, [input_shape[0]]), tf.reshape(
                input_len, []), src_paddings.dtype)
        return src_inputs, src_paddings
Example #10
0
    def testMaxCanvasSizeUnderUniformRollinPolicy(self):
        """Tests for valid canvas size."""
        with self.session(use_gpu=True) as sess:
            params = insertion.SymbolInsertionLayer.Params()
            params.name = 'insertion'
            params.rollin_policy = 'oracle'
            params.oracle_policy = 'uniform'

            insertion_layer = insertion.SymbolInsertionLayer(params)

            batch_size = 4
            time_dim = 10

            inputs = tf.tile(tf.expand_dims(tf.range(time_dim), 0),
                             [batch_size, 1])
            inputs_len = tf.random.uniform([batch_size], 0, time_dim, tf.int32)
            paddings = 1 - tf.sequence_mask(inputs_len, time_dim, tf.int32)
            spec = insertion_layer.FProp(None,
                                         inputs,
                                         paddings,
                                         force_sample_last_token=False)

            canvas_with_max_length = False
            for _ in range(1000):
                canvas_max_len, canvas, canvas_paddings = sess.run(
                    [inputs_len, spec.canvas, spec.canvas_paddings])

                for b in range(batch_size):
                    max_len = canvas_max_len[b]
                    length = np.sum(1 - canvas_paddings[b, :]).astype(np.int32)
                    canvas_with_max_length |= length == max_len
                    self.assertLessEqual(length, max_len)
                    # Invalid entries of canvas should be 0.
                    self.assertAllEqual(canvas[b, length:],
                                        [0] * (canvas.shape[1] - length))

            # With high probability, there should be at least one canvas that is
            # of the same size as the maximum canvas size.
            self.assertEqual(canvas_with_max_length, True)
Example #11
0
    def FProp(self,
              theta,
              x,
              x_paddings=None,
              eos_id=1,
              force_sample_last_token=True):
        """Applies SymbolInsertionLayer.

    We take in a `x`, which represents the groundtruth sequence (i.e., English
    sequence). We return a sampled rollin (observed) canvas (i.e., random subset
    of the English sequence), as well as the target (indices) for an
    insertion-based model (i.e., the targets given the random observed subset).

    Args:
      theta: Ignored, this can be None.
      x: The symbol ids of shape `[batch_size, time_dim]`.
      x_paddings: The paddings (1 or 0) of shape `[batch_size, time_dim]` where
        0 is valid and 1 is invalid.
      eos_id: The <eos> token id to represent end-of-slot.
      force_sample_last_token: Set True to force sample the last token of `x`.

    Returns:
      A `NestedMap`.
        - canvas: The canvas (based off of the `rollin_policy`) of shape
          [batch_size, c_dim]. Note that, `c_dim` <= `time_dim` but need not be
          equal.
        - canvas_indices: The canvas indices (into `x`).
        - canvas_paddings: The paddings of `canvas_indices`.
        - target_indices: The target indices of shape [num_targets, 3].
          `num_targets` is the number of total targets in the entire batch.
          [:, 0] captures the batch, [:, 1] captures the slot, and [:, 2]
          captures the token. Each row [batch, slot, vocab] represents the
          indices of the target -- i.e., the batch, slot and vocab combination
          of the target. Typical usage of these indices is to tf.gather_nd
          the log-probs (from the softmax layer).
        - target_weights: The target weights.

    Raises:
      ValueError: If invalid params.
    """
        p = self.params

        batch_size = py_utils.GetShape(x)[0]
        time_dim = py_utils.GetShape(x)[1]

        if x_paddings is None:
            x_paddings = tf.zeros([batch_size, time_dim], tf.float32)

        oracle_policy = p.oracle_policy
        rollin_policy = (oracle_policy
                         if p.rollin_policy == 'oracle' else p.rollin_policy)

        if rollin_policy != 'uniform':
            raise ValueError('Unknown or unsupported rollin policy: %s' %
                             rollin_policy)
        if oracle_policy != 'uniform':
            raise ValueError('Unknown or unsupported oracle policy: %s' %
                             oracle_policy)

        x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32)

        # Compute the desired length per example in the batch.
        ratio = tf.random.uniform([batch_size], 0.0, 1.0, seed=p.random_seed)
        if force_sample_last_token:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len, tf.float32), tf.int32),
                x_len - 1) + 1
        else:
            c_len = tf.minimum(
                tf.cast(ratio * tf.cast(x_len + 1, tf.float32), tf.int32),
                x_len)
        # Compute the maximum length across the batch.
        c_len_max = tf.reduce_max(c_len)

        # Grab subset of random valid indices per example.
        z_logits = tf.cast(
            tf.expand_dims(tf.range(time_dim), 0) >= tf.expand_dims(x_len, 1),
            tf.float32) * -1e9
        if force_sample_last_token:
            # Force sample the last token -- i.e., as indexed by `x_len - 1`. We can
            # accomplish this by add +LARGE_NUMBER to the logits.
            z_logits += tf.cast(
                tf.equal(tf.expand_dims(tf.range(time_dim), 0),
                         tf.expand_dims(x_len - 1, 1)), tf.float32) * 1e9
        # Gumbel-max trick to sample (we only sample valid positions per sample in
        # the batch).
        z = -tf.math.log(-tf.math.log(
            tf.random.uniform([batch_size, time_dim], seed=p.random_seed)))
        unused_c_values, c_indices = tf.nn.top_k(z_logits + z, time_dim)

        # Trim everything > c_len_max.
        c_indices = c_indices[:, :c_len_max]

        # Invalidate any indices >= c_len, we use the last index as the default
        # invalid index.
        c_indices = tf.where(
            tf.expand_dims(tf.range(c_len_max), 0) < tf.expand_dims(c_len, 1),
            c_indices, tf.fill(py_utils.GetShape(c_indices), time_dim - 1))

        # Materialize the canvas.
        c_indices = tf.sort(c_indices)
        c = tf.gather_nd(
            x,
            tf.stack([
                tf.reshape(
                    tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                            [1, c_len_max]), [-1]),
                tf.reshape(c_indices, [-1])
            ], 1))
        c = tf.reshape(c, [batch_size, c_len_max])

        # Compute the paddings.
        c_paddings = 1 - tf.sequence_mask(
            c_len, c_len_max, dtype=x_paddings.dtype)
        c *= tf.cast(1 - c_paddings, tf.int32)

        indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, c_len_max]), [batch_size * c_len_max, 1]),
            tf.reshape(c_indices, [batch_size * c_len_max, 1])
        ], 1)
        x_token_is_observed = tf.scatter_nd(
            indices, tf.ones([batch_size * c_len_max], tf.int32),
            py_utils.GetShape(x))
        # `x_segments` captures which slot each `x` belongs to (both observed and
        # tokens that need to be observed).
        x_segments = tf.cumsum(x_token_is_observed, 1, exclusive=True)

        x_token_is_observed = tf.cast(x_token_is_observed, tf.bool)
        prev_x_token_is_observed = tf.pad(x_token_is_observed[:, :-1],
                                          [[0, 0], [1, 0]],
                                          constant_values=True)
        x_token_is_observed = tf.reshape(x_token_is_observed, [-1])
        prev_x_token_is_observed = tf.reshape(prev_x_token_is_observed, [-1])
        x_is_valid = tf.cast(1 - x_paddings, tf.bool)
        x_is_valid = tf.reshape(x_is_valid, [-1])

        # Remap all the observed to <eos>, note some of these need a zero weight
        # (or else there would be <eos> and valid token in the same slot).
        target_indices = tf.cast(tf.reshape(x, [-1, 1]), tf.int32)
        target_indices = tf.where(
            x_token_is_observed,
            tf.fill(py_utils.GetShape(target_indices), eos_id), target_indices)

        # TODO(williamchan): We give uniform 1.0 weight, however, math suggests
        # we may want to weigh this term by the original sequence length.
        target_weights = tf.ones_like(target_indices, tf.float32)

        # We need to set all the weights for <eos> which actually have valid tokens
        # in the slot to zero.
        target_weights = tf.where(
            x_token_is_observed & ~prev_x_token_is_observed,
            tf.zeros_like(target_weights), target_weights)

        # TODO(williamchan): Consider dropping the entries w/ weight zero.

        # Add the batch and slot indices.
        target_indices = tf.concat([
            tf.reshape(
                tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                        [1, time_dim]), [batch_size * time_dim, 1]),
            tf.reshape(x_segments, [-1, 1]), target_indices
        ], 1)

        # Select only the valid indices. The selected valid ones include slots w/
        # <eos>.
        target_indices = target_indices[x_is_valid]
        target_weights = target_weights[x_is_valid]

        return py_utils.NestedMap(canvas=c,
                                  canvas_indices=c_indices,
                                  canvas_paddings=c_paddings,
                                  target_indices=target_indices,
                                  target_weights=target_weights)