def SequenceAppendToken(x, x_paddings, token, extend=False): """Appends <token> to sequence `x`. Args: x: A sequence of tokens of shape [batch_size, x_len_max]. x_paddings: The paddings of `x`. token: The token to append (of type integer). extend: Whether to extend `x` along the length dimension, this must be true for any sequence length in `x` that is `x_len_max` or else an invalid sequence will be emitted. Returns: A tuple. - The new sequence, Tensor of shape [batch_size, x_len_max]. - The new paddings, Tensor of shape [batch_size, x_len_max]. """ batch_size = py_utils.GetShape(x)[0] x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32) if extend: x = tf.pad(x, [[0, 0], [0, 1]]) # Mask all invalid entries of `x` to 0. x *= tf.sequence_mask(x_len, py_utils.GetShape(x)[1], x.dtype) # Append the <token> based on `x_len`. x += tf.scatter_nd(tf.stack([tf.range(batch_size), x_len], axis=1), tf.cast(tf.fill([batch_size], token), x.dtype), py_utils.GetShape(x)) x_paddings = 1 - tf.sequence_mask(x_len + 1, py_utils.GetShape(x)[1], x_paddings.dtype) return x, x_paddings
def testContiguousCanvasUnderUniformRollinPolicy(self): """Tests for valid canvas size.""" with self.session(use_gpu=True): params = insertion.SymbolInsertionLayer.Params() params.name = 'insertion' params.rollin_policy = 'oracle' params.oracle_policy = 'uniform' insertion_layer = insertion.SymbolInsertionLayer(params) batch_size = 4 time_dim = 10 inputs = tf.tile( tf.expand_dims(tf.range(time_dim), 0) + 100, [batch_size, 1]) inputs_len = tf.random.uniform([batch_size], 0, time_dim, tf.int32) paddings = 1 - tf.sequence_mask(inputs_len, time_dim, tf.int32) spec = insertion_layer.FProp( None, inputs, paddings, force_sample_last_token=False) for _ in range(1000): canvas, canvas_paddings = self.evaluate( [spec.canvas, spec.canvas_paddings]) for b in range(batch_size): length = np.sum(1 - canvas_paddings[b, :]).astype(np.int32) # Check for valid part of the canvas and padding. for l in range(length): self.assertEqual(canvas_paddings[b, l], 0) self.assertNotEqual(canvas[b, l], 0) # Check for invalid part of the canvas and padding. for l in range(length, canvas.shape[1]): self.assertEqual(canvas_paddings[b, l], 1) self.assertEqual(canvas[b, l], 0)
def _StringsToIdsImpl(self, strs, max_length, append_eos, languages): del append_eos del languages p = self.params tokens = self._tokenizer.tokenize(strs) num_tokens = tokens.row_lengths(-1) if max_length is None: labels = tokens.to_tensor(default_value=p.target_unk_id) else: output_shape = tf.convert_to_tensor(strs).shape + [max_length] labels = tokens.to_tensor( default_value=p.target_unk_id, shape=output_shape) num_tokens = tf.minimum(num_tokens, max_length) ids = tf.concat([ tf.expand_dims(tf.ones_like(strs, tf.int32) * p.target_sos_id, -1), labels[:, :-1] ], -1) paddings = 1.0 - tf.sequence_mask( num_tokens, maxlen=max_length, dtype=tf.float32) return ids, labels, paddings
def UnstackFeatures(self, src_inputs, src_paddings): """Unstacks src_input and src_paddings based off stack height.""" sh = self.params.stack_height bs, old_series_length, _, channels = py_utils.GetShape(src_inputs) unstacked_series_length = old_series_length * sh src_inputs = tf.reshape(src_inputs, [bs, unstacked_series_length, -1, channels]) content = 1 - src_paddings lengths = tf.cast(sh * tf.reduce_sum(content, axis=1), tf.int32) mask = tf.sequence_mask(lengths, maxlen=unstacked_series_length) src_paddings = 1 - tf.cast(mask, tf.int32) return src_inputs, src_paddings
def testLayerNormalizedLSTMCellLeanExt(self): cell_p = self._GetParams() seqlen, batch, input_dim = 4, 2, 2 inputs = tf.convert_to_tensor( np.random.rand(seqlen, batch, input_dim).astype(np.float32)) input_lens = np.random.randint(1, seqlen + 1, size=batch) paddings = 1. - tf.sequence_mask( input_lens, maxlen=seqlen, dtype=tf.float32) paddings = tf.transpose(paddings) reset_mask = tf.zeros((seqlen, batch), tf.float32) m0 = tf.convert_to_tensor( np.random.rand(batch, input_dim).astype(np.float32)) c0 = tf.convert_to_tensor( np.random.rand(batch, input_dim).astype(np.float32)) state0 = py_utils.NestedMap(m=m0, c=c0) with self.session(): cell = cell_p.Instantiate() self.evaluate(tf.global_variables_initializer()) # The canonical path state = state0 for i in range(seqlen): state, _ = cell.FPropDefaultTheta( state, py_utils.NestedMap( act=[inputs[i, :, :]], padding=paddings[i, :, tf.newaxis], reset_mask=reset_mask[i, :, tf.newaxis])) expected_state = self.evaluate(state) # Taking input projection outside of the loop. cell_theta = cell.theta.copy() cell_theta.wm_i = cell_theta.wm[:cell.params.num_input_nodes, :] cell_theta.wm_h = cell_theta.wm[cell.params.num_input_nodes:, :] proj_inputs = cell.ProjectInputSequence(cell_theta, py_utils.NestedMap(act=[inputs])) state = state0 for i in range(seqlen): state, _ = cell.FPropWithProjectedInput( cell_theta, state, py_utils.NestedMap( proj_inputs=proj_inputs[i, :, :], padding=paddings[i, :, tf.newaxis], reset_mask=reset_mask[i, :, tf.newaxis])) actual_state = self.evaluate(state) tf.logging.info('expected_state:{}'.format(expected_state)) tf.logging.info('actual_state:{}'.format(actual_state)) self.assertAllClose(expected_state.m, actual_state.m) self.assertAllClose(expected_state.c, actual_state.c)
def SequenceConcat(x, x_paddings, y, y_paddings, pad=0): """Concats sequence `x` with sequence `y`. This function is length aware (based off the paddings). Args: x: A sequence of tokens of shape [batch_size, x_len_max]. x_paddings: The paddings of `x`. y: A sequence of tokens of shape [batch_size, y_len_max]. y_paddings: The paddings of `y`. pad: The <pad> token to fill the concatenated sequence (of type integer). Returns: A tuple. - Concatenation of `x` and `y` of shape [batch_size, x_len_max + y_len_max]. - Paddings of the concatenation of shape [batch_size, x_len_max + y_len_max]. """ # Get the length (w/ eos). x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32) y_len = tf.cast(tf.round(tf.reduce_sum(1 - y_paddings, 1)), tf.int32) batch_size = py_utils.GetShape(x)[0] y_len_max = py_utils.GetShape(y)[1] # Pad `x` with necessary <pad>. x = tf.concat([x, tf.fill(py_utils.GetShape(y), pad)], 1) # Replace all <pad> with 0. x = tf.where(tf.not_equal(x, pad), x, tf.fill(py_utils.GetShape(x), 0)) # Compute the write indices of `y` in `xy`. indices = tf.stack([ tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, y_len_max]), (tf.tile(tf.expand_dims(tf.range(y_len_max), 0), [batch_size, 1]) + tf.expand_dims(x_len, 1)), ], 2) xy = x + tf.scatter_nd(indices, y, py_utils.GetShape(x)) # We need to remap all <pad> to `pad`. xy = tf.where( tf.less(tf.expand_dims(tf.range(py_utils.GetShape(xy)[1]), 0), tf.expand_dims(x_len + y_len, 1)), xy, tf.fill(py_utils.GetShape(xy), pad)) xy_paddings = 1 - tf.sequence_mask(x_len + y_len, py_utils.GetShape(xy)[1], x_paddings.dtype) return xy, xy_paddings
def _GetTestInputs(self, packed_input): seqlen, batch, input_dim, output_dim = 4, 5, 7, 9 inputs = tf.convert_to_tensor( np.random.rand(seqlen, batch, input_dim).astype(np.float32)) input_lens = np.random.randint(1, seqlen + 1, size=batch) padding = 1. - tf.sequence_mask(input_lens, maxlen=seqlen, dtype=tf.float32) padding = tf.transpose(padding)[:, :, tf.newaxis] segment_id = None if packed_input: segment_id = tf.convert_to_tensor( np.random.randint(0, seqlen, (seqlen, batch, 1), np.int32)) m = tf.convert_to_tensor( np.random.rand(batch, output_dim).astype(np.float32)) c = tf.convert_to_tensor( np.random.rand(batch, output_dim).astype(np.float32)) return inputs, padding, m, c, segment_id
def SequenceTrimLastToken(x, x_paddings): """Trims the last token off of sequence `x`, and set trimmed elements to 0. Args: x: A sequence of tokens of shape [batch_size, x_len_max]. x_paddings: The paddings of `x`. Returns: A tuple. - The new sequence, Tensor of shape [batch_size, x_len_max]. - The new paddings, Tensor of shape [batch_size, x_len_max]. """ x_len = tf.reduce_sum(1 - x_paddings, 1) x_len_max = py_utils.GetShape(x)[1] x_trimmed_len = tf.maximum(x_len - 1, 0) x_trimmed_paddings = tf.sequence_mask(x_trimmed_len, x_len_max, x_paddings.dtype) x_trimmed = x * tf.cast(x_trimmed_paddings, x.dtype) return x_trimmed, 1 - x_trimmed_paddings
def _MaybePadSourceInputs(self, src_inputs, src_paddings): p = self.params if not p.append_eos_frame: return src_inputs, src_paddings per_src_len = tf.reduce_sum(1 - src_paddings, 1) per_src_len += 1 max_src_len = tf.reduce_max(per_src_len) input_shape = tf.shape(src_inputs) input_len = tf.maximum(input_shape[1], tf.cast(max_src_len, tf.int32)) pad_steps = input_len - input_shape[1] src_inputs = tf.concat([ src_inputs, tf.zeros(inplace_ops.inplace_update(input_shape, 1, pad_steps), src_inputs.dtype) ], 1) src_paddings = 1 - tf.sequence_mask( tf.reshape(per_src_len, [input_shape[0]]), tf.reshape( input_len, []), src_paddings.dtype) return src_inputs, src_paddings
def testMaxCanvasSizeUnderUniformRollinPolicy(self): """Tests for valid canvas size.""" with self.session(use_gpu=True) as sess: params = insertion.SymbolInsertionLayer.Params() params.name = 'insertion' params.rollin_policy = 'oracle' params.oracle_policy = 'uniform' insertion_layer = insertion.SymbolInsertionLayer(params) batch_size = 4 time_dim = 10 inputs = tf.tile(tf.expand_dims(tf.range(time_dim), 0), [batch_size, 1]) inputs_len = tf.random.uniform([batch_size], 0, time_dim, tf.int32) paddings = 1 - tf.sequence_mask(inputs_len, time_dim, tf.int32) spec = insertion_layer.FProp(None, inputs, paddings, force_sample_last_token=False) canvas_with_max_length = False for _ in range(1000): canvas_max_len, canvas, canvas_paddings = sess.run( [inputs_len, spec.canvas, spec.canvas_paddings]) for b in range(batch_size): max_len = canvas_max_len[b] length = np.sum(1 - canvas_paddings[b, :]).astype(np.int32) canvas_with_max_length |= length == max_len self.assertLessEqual(length, max_len) # Invalid entries of canvas should be 0. self.assertAllEqual(canvas[b, length:], [0] * (canvas.shape[1] - length)) # With high probability, there should be at least one canvas that is # of the same size as the maximum canvas size. self.assertEqual(canvas_with_max_length, True)
def FProp(self, theta, x, x_paddings=None, eos_id=1, force_sample_last_token=True): """Applies SymbolInsertionLayer. We take in a `x`, which represents the groundtruth sequence (i.e., English sequence). We return a sampled rollin (observed) canvas (i.e., random subset of the English sequence), as well as the target (indices) for an insertion-based model (i.e., the targets given the random observed subset). Args: theta: Ignored, this can be None. x: The symbol ids of shape `[batch_size, time_dim]`. x_paddings: The paddings (1 or 0) of shape `[batch_size, time_dim]` where 0 is valid and 1 is invalid. eos_id: The <eos> token id to represent end-of-slot. force_sample_last_token: Set True to force sample the last token of `x`. Returns: A `NestedMap`. - canvas: The canvas (based off of the `rollin_policy`) of shape [batch_size, c_dim]. Note that, `c_dim` <= `time_dim` but need not be equal. - canvas_indices: The canvas indices (into `x`). - canvas_paddings: The paddings of `canvas_indices`. - target_indices: The target indices of shape [num_targets, 3]. `num_targets` is the number of total targets in the entire batch. [:, 0] captures the batch, [:, 1] captures the slot, and [:, 2] captures the token. Each row [batch, slot, vocab] represents the indices of the target -- i.e., the batch, slot and vocab combination of the target. Typical usage of these indices is to tf.gather_nd the log-probs (from the softmax layer). - target_weights: The target weights. Raises: ValueError: If invalid params. """ p = self.params batch_size = py_utils.GetShape(x)[0] time_dim = py_utils.GetShape(x)[1] if x_paddings is None: x_paddings = tf.zeros([batch_size, time_dim], tf.float32) oracle_policy = p.oracle_policy rollin_policy = (oracle_policy if p.rollin_policy == 'oracle' else p.rollin_policy) if rollin_policy != 'uniform': raise ValueError('Unknown or unsupported rollin policy: %s' % rollin_policy) if oracle_policy != 'uniform': raise ValueError('Unknown or unsupported oracle policy: %s' % oracle_policy) x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32) # Compute the desired length per example in the batch. ratio = tf.random.uniform([batch_size], 0.0, 1.0, seed=p.random_seed) if force_sample_last_token: c_len = tf.minimum( tf.cast(ratio * tf.cast(x_len, tf.float32), tf.int32), x_len - 1) + 1 else: c_len = tf.minimum( tf.cast(ratio * tf.cast(x_len + 1, tf.float32), tf.int32), x_len) # Compute the maximum length across the batch. c_len_max = tf.reduce_max(c_len) # Grab subset of random valid indices per example. z_logits = tf.cast( tf.expand_dims(tf.range(time_dim), 0) >= tf.expand_dims(x_len, 1), tf.float32) * -1e9 if force_sample_last_token: # Force sample the last token -- i.e., as indexed by `x_len - 1`. We can # accomplish this by add +LARGE_NUMBER to the logits. z_logits += tf.cast( tf.equal(tf.expand_dims(tf.range(time_dim), 0), tf.expand_dims(x_len - 1, 1)), tf.float32) * 1e9 # Gumbel-max trick to sample (we only sample valid positions per sample in # the batch). z = -tf.math.log(-tf.math.log( tf.random.uniform([batch_size, time_dim], seed=p.random_seed))) unused_c_values, c_indices = tf.nn.top_k(z_logits + z, time_dim) # Trim everything > c_len_max. c_indices = c_indices[:, :c_len_max] # Invalidate any indices >= c_len, we use the last index as the default # invalid index. c_indices = tf.where( tf.expand_dims(tf.range(c_len_max), 0) < tf.expand_dims(c_len, 1), c_indices, tf.fill(py_utils.GetShape(c_indices), time_dim - 1)) # Materialize the canvas. c_indices = tf.sort(c_indices) c = tf.gather_nd( x, tf.stack([ tf.reshape( tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, c_len_max]), [-1]), tf.reshape(c_indices, [-1]) ], 1)) c = tf.reshape(c, [batch_size, c_len_max]) # Compute the paddings. c_paddings = 1 - tf.sequence_mask( c_len, c_len_max, dtype=x_paddings.dtype) c *= tf.cast(1 - c_paddings, tf.int32) indices = tf.concat([ tf.reshape( tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, c_len_max]), [batch_size * c_len_max, 1]), tf.reshape(c_indices, [batch_size * c_len_max, 1]) ], 1) x_token_is_observed = tf.scatter_nd( indices, tf.ones([batch_size * c_len_max], tf.int32), py_utils.GetShape(x)) # `x_segments` captures which slot each `x` belongs to (both observed and # tokens that need to be observed). x_segments = tf.cumsum(x_token_is_observed, 1, exclusive=True) x_token_is_observed = tf.cast(x_token_is_observed, tf.bool) prev_x_token_is_observed = tf.pad(x_token_is_observed[:, :-1], [[0, 0], [1, 0]], constant_values=True) x_token_is_observed = tf.reshape(x_token_is_observed, [-1]) prev_x_token_is_observed = tf.reshape(prev_x_token_is_observed, [-1]) x_is_valid = tf.cast(1 - x_paddings, tf.bool) x_is_valid = tf.reshape(x_is_valid, [-1]) # Remap all the observed to <eos>, note some of these need a zero weight # (or else there would be <eos> and valid token in the same slot). target_indices = tf.cast(tf.reshape(x, [-1, 1]), tf.int32) target_indices = tf.where( x_token_is_observed, tf.fill(py_utils.GetShape(target_indices), eos_id), target_indices) # TODO(williamchan): We give uniform 1.0 weight, however, math suggests # we may want to weigh this term by the original sequence length. target_weights = tf.ones_like(target_indices, tf.float32) # We need to set all the weights for <eos> which actually have valid tokens # in the slot to zero. target_weights = tf.where( x_token_is_observed & ~prev_x_token_is_observed, tf.zeros_like(target_weights), target_weights) # TODO(williamchan): Consider dropping the entries w/ weight zero. # Add the batch and slot indices. target_indices = tf.concat([ tf.reshape( tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, time_dim]), [batch_size * time_dim, 1]), tf.reshape(x_segments, [-1, 1]), target_indices ], 1) # Select only the valid indices. The selected valid ones include slots w/ # <eos>. target_indices = target_indices[x_is_valid] target_weights = target_weights[x_is_valid] return py_utils.NestedMap(canvas=c, canvas_indices=c_indices, canvas_paddings=c_paddings, target_indices=target_indices, target_weights=target_weights)