def collapse_repeated(labels, seq_length, name=None): """Merge repeated labels into single labels. Args: labels: Tensor of shape [batch, max value in seq_length] seq_length: Tensor of shape [batch], sequence length of each batch element. name: A name for this `Op`. Defaults to "collapse_repeated_labels". Returns: A tuple `(collapsed_labels, new_seq_length)` where collapsed_labels: Tensor of shape [batch, max_seq_length] with repeated labels collapsed and padded to max_seq_length, eg: `[[A, A, B, B, A], [A, B, C, D, E]] => [[A, B, A, 0, 0], [A, B, C, D, E]]` new_seq_length: int tensor of shape [batch] with new sequence lengths. """ with ops.name_scope(name, "collapse_repeated_labels", [labels, seq_length]): labels = ops.convert_to_tensor(labels, name="labels") seq_length = ops.convert_to_tensor(seq_length, name="seq_length") # Mask labels that don't equal previous label. label_mask = array_ops.concat([ array_ops.ones_like(labels[:, :1], dtypes.bool), math_ops.not_equal(labels[:, 1:], labels[:, :-1]) ], axis=1) # Filter labels that aren't in the original sequence. maxlen = _get_dim(labels, 1) seq_mask = array_ops.sequence_mask(seq_length, maxlen=maxlen) label_mask = math_ops.logical_and(label_mask, seq_mask) # Count masks for new sequence lengths. new_seq_len = math_ops.reduce_sum( math_ops.cast(label_mask, dtypes.int32), axis=1) # Mask indexes based on sequence length mask. new_maxlen = math_ops.reduce_max(new_seq_len) idx_mask = array_ops.sequence_mask(new_seq_len, maxlen=new_maxlen) # Flatten everything and mask out labels to keep and sparse indices. flat_labels = array_ops.reshape(labels, [-1]) flat_label_mask = array_ops.reshape(label_mask, [-1]) flat_idx_mask = array_ops.reshape(idx_mask, [-1]) idx = math_ops.range(_get_dim(flat_idx_mask, 0)) # Scatter to flat shape. flat = array_ops.scatter_nd( indices=array_ops.expand_dims( array_ops.boolean_mask(idx, flat_idx_mask), axis=1), updates=array_ops.boolean_mask(flat_labels, flat_label_mask), shape=array_ops.shape(flat_idx_mask)) # Reshape back to square batch. batch_size = _get_dim(labels, 0) new_shape = [batch_size, new_maxlen] return (array_ops.reshape(flat, new_shape), math_ops.cast(new_seq_len, seq_length.dtype))
def Reference_embedding(inputs, input_lengths, training=True, channels=[32, 64, 128], gru_unit=128, name='reference_embedding', reuse=False): with tf.variable_scope(name, reuse=reuse): Mel_dim_size = 80 batch_size = tf.shape(inputs)[0] input_time_steps = tf.shape(inputs)[1] if len(inputs.shape) == 3: inputs = tf.expand_dims(inputs, axis=-1) mask = tf.expand_dims( tf.expand_dims(array_ops.sequence_mask(input_lengths, tf.shape(inputs)[1], tf.float32), axis=-1), axis=-1) loop_conv2d = inputs * mask for idk, channel in enumerate(channels): loop_conv2d = tf.layers.conv2d(loop_conv2d, filters=channel, kernel_size=(3, 3), strides=(2, 2), padding='same', name='conv2d_{}'.format(idk), activation=tf.nn.relu) input_lengths = tf.ceil(input_lengths / 2) mask = tf.expand_dims( tf.expand_dims(array_ops.sequence_mask(input_lengths, tf.shape(loop_conv2d)[1], tf.float32), axis=-1), axis=-1) loop_conv2d = loop_conv2d * mask loop_conv2d = tf.layers.batch_normalization(loop_conv2d, training=training) loop_conv2d = loop_conv2d * mask Mel_dim_size = math.ceil(Mel_dim_size / 2) loop_conv2d = tf.reshape(loop_conv2d, shape=(batch_size, -1, Mel_dim_size*channels[-1])) gru_output = gru(loop_conv2d, gru_unit, sequence_length=input_lengths) gru_output = tf.transpose(gru_output, [1, 0, 2]) output = gru_output[-1] output = tf.layers.dense(output, units=gru_unit, activation=tf.nn.tanh, name='after_dense_style_emb') return output
def testOneDimensional(self): with self.test_session(): res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual(res.eval(), [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]]) # test dtype and default maxlen: res = array_ops.sequence_mask( constant_op.constant([0, 1, 4]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [3, None]) self.assertAllEqual(res.eval(), [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
def testNormal(self): with self.test_session(): res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual(res.eval(), [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]]) # test dtype and default maxlen: res = array_ops.sequence_mask( constant_op.constant([0, 1, 4]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [3, None]) self.assertAllEqual(res.eval(), [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
def advantage(rewards, sequence_length, max_sequence_length, weights=1., discount=.95, time_major=False): """Compute the advantage based on the baseline discounted rewards. Arguments: rewards: 1D or 2D Tensor, contiguous sequence(s) of rewards. sequence_length: 0D or 1D Tensor, the length of the `rewards` sequence(s). max_sequence_length: `int` or `list`, maximum length(s) of rewards. weights: `tf.Tensor`, the weights/mask to apply to the result. discount: 0D scalar, the discount factor (gamma). time_major: `Boolean`, if rewards is 2D and already time_major, i.e. [time, batch_size]. Returns: Tensor with the same shape as `rewards`. """ discounted_reward_op = core_ops.discount_rewards( rewards, max_sequence_length=max_sequence_length, weights=weights, discount=discount, time_major=time_major) sequence_length_expanded = array_ops.expand_dims(sequence_length, -1) baseline_op = math_ops.cumsum( discounted_reward_op, axis=-1, reverse=False) / math_ops.cast( sequence_length_expanded, discounted_reward_op.dtype) baseline_op *= math_ops.cast( array_ops.sequence_mask(sequence_length, maxlen=max_sequence_length), baseline_op.dtype) return discounted_reward_op - baseline_op
def crf_binary_score(tag_indices, sequence_lengths, transition_params): """ crf_binary_score计算二元概率,输出维度为[batch_size],向量中的每个元素是一个sequence中所有的转移概率之和。 序列的长度为num_transitions,则会发生num_transitions-1此转移 转移的起始下标start_tag_indices对应 [0, ..., num_transitions-2] 结束下标end_tag_indices对应 [1, ..., num_transitions-1] 使用与crf_unary_score类似的原理取出对应位置的转移概率,进行mask操作求和,返回binary_scores。 """ # tag_indices:(2, 3), transition_params: (4, 4) # Get shape information. num_tags = transition_params.get_shape()[0] # 4 # 发生转移的次数 num_transitions = array_ops.shape(tag_indices)[1] - 1 # 2 # tag_indices = [[4, 1, 2], [0, 2, 1]] # -> start_tag_indices: [[4, 1], [0, 2]] # -> end_tag_indices: [[1, 2], [2, 1]] start_tag_indices = array_ops.slice(tag_indices, [0, 0], [-1, num_transitions]) # (2, 2) end_tag_indices = array_ops.slice(tag_indices, [0, 1], [-1, num_transitions]) # (2, 2) # Encode the indices in a flattened representation. flattened_transition_indices = start_tag_indices * num_tags + end_tag_indices flattened_transition_params = array_ops.reshape(transition_params, [-1]) # 在flattened_transition_params中获得每一个转移的分数值 binary_scores = array_ops.gather(flattened_transition_params, flattened_transition_indices) # (2, 2) masks = array_ops.sequence_mask(sequence_lengths, maxlen=array_ops.shape(tag_indices)[1], dtype=dtypes.float32) # (2, 3) truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1]) # (2, 2) binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1) # (2,) return binary_scores
def test_Bidirectional_with_time_major_input(self, time_major): batch_size, time, input_dim = 2, 3, 1 inputs = array_ops.zeros((batch_size, time, input_dim)) # length is [1 2]. Within the batch, the first element has 1 step, and the # second element as 2 steps. lengths = math_ops.range(1, 1 + batch_size) mask = array_ops.sequence_mask(lengths, maxlen=time, dtype=dtypes.float32) forward_cell = _AddOneCell(name='forward') backward_cell = _AddOneCell(name='backward') layer = keras.layers.Bidirectional( layer=keras.layers.RNN( forward_cell, time_major=time_major, return_sequences=True), backward_layer=keras.layers.RNN( backward_cell, time_major=time_major, return_sequences=True, go_backwards=True)) # Switch to time-major. if time_major: inputs = array_ops.transpose(inputs, [1, 0, 2]) mask = array_ops.transpose(mask, [1, 0]) keras_outputs = layer(inputs, mask=mask) if time_major: keras_outputs = array_ops.transpose(keras_outputs, [1, 0, 2]) # expect the first element in batch has 1 step and second element in batch # has 2 steps. expected_result = np.array([[[1., 1.], [0., 0.], [0., 0.]], [[1., 1.], [1., 1.], [0., 0.]]]) self.assertAllClose(expected_result, keras_outputs)
def crf_binary_score(tag_indices, sequence_lengths, transition_params): """Computes the binary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] matrix of binary potentials. Returns: binary_scores: A [batch_size] vector of binary scores. """ # Get shape information. num_tags = transition_params.get_shape()[0] num_transitions = array_ops.shape(tag_indices)[1] - 1 # Truncate by one on each side of the sequence to get the start and end # indices of each transition. start_tag_indices = array_ops.slice(tag_indices, [0, 0], [-1, num_transitions]) end_tag_indices = array_ops.slice(tag_indices, [0, 1], [-1, num_transitions]) # Encode the indices in a flattened representation. flattened_transition_indices = start_tag_indices * num_tags + end_tag_indices flattened_transition_params = array_ops.reshape(transition_params, [-1]) # Get the binary scores based on the flattened representation. binary_scores = array_ops.gather(flattened_transition_params, flattened_transition_indices) masks = array_ops.sequence_mask(sequence_lengths, maxlen=array_ops.shape(tag_indices)[1], dtype=dtypes.float32) truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1]) binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1) return binary_scores
def __init__(self, attention_units, memory, sequence_length=None, time_major=True, mode=0): self.attention_units = attention_units self.enc_units = memory.get_shape()[-1].value if time_major: memory = tf.transpose(memory, perm=(1, 0, 2)) self.enc_length = tf.shape(memory)[1] self.batch_size = tf.shape(memory)[0] self.mode = mode self.mask = array_ops.sequence_mask( sequence_length, self.enc_length) if sequence_length is not None else None self.tiny = -math.inf * tf.ones(shape=(self.batch_size, self.enc_length)) self.memory = tf.reshape( memory, (tf.shape(memory)[0], self.enc_length, 1, self.enc_units)) ### pre-compute Uahj to minimize the computational cost with tf.variable_scope('attention'): Ua = tf.get_variable(name='Ua', shape=(1, 1, self.enc_units, self.attention_units)) self.hidden_feats = tf.nn.conv2d(self.memory, Ua, [1, 1, 1, 1], "SAME")
def crf_unary_score(tag_indices, sequence_lengths, inputs): """Computes the unary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. Returns: unary_scores: A [batch_size] vector of unary scores. """ batch_size = array_ops.shape(inputs)[0] max_seq_len = array_ops.shape(inputs)[1] num_tags = array_ops.shape(inputs)[2] flattened_inputs = array_ops.reshape(inputs, [-1]) offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) # Use int32 or int64 based on tag_indices' dtype. if tag_indices.dtype == dtypes.int64: offsets = math_ops.to_int64(offsets) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( array_ops.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len]) masks = array_ops.sequence_mask(sequence_lengths, maxlen=array_ops.shape(tag_indices)[1], dtype=dtypes.float32) unary_scores = math_ops.reduce_sum(unary_scores * masks, 1) return unary_scores
def _maybe_mask_score(score, memory_sequence_length=None, memory_mask=None, autoregressive_mask=None, score_mask_value=None): """Mask the attention score based on the masks.""" if memory_sequence_length is None and memory_mask is None and autoregressive_mask is None: return score if memory_sequence_length is not None and memory_mask is not None: raise ValueError( "memory_sequence_length and memory_mask can't be provided " "at same time.") if memory_sequence_length is not None: message = "All values in memory_sequence_length must be greater than zero." with ops.control_dependencies([ check_ops.assert_positive(memory_sequence_length, message=message) ]): memory_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(score)[1]) if memory_mask is None: memory_mask = tf.ones_like(score) memory_mask = tf.cast(memory_mask, tf.int32) * tf.cast( autoregressive_mask, tf.int32) memory_mask = tf.cast(memory_mask, tf.bool) score_mask_values = score_mask_value * array_ops.ones_like(score) return array_ops.where(memory_mask, score, score_mask_values)
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape.dims[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape.dims[1].value or array_ops.shape( parent_ids)[1] beam_width = parent_ids.shape.dims[2].value or array_ops.shape( parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. in_bound_steps = array_ops.transpose(array_ops.sequence_mask( sequence_length, maxlen=max_time), perm=[2, 0, 1]) sorted_beam_ids = array_ops.where(in_bound_steps, x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile( array_ops.reshape(math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile( array_ops.reshape(math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape(gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def cummean(op, length, max_length): mask = math_ops.cast(array_ops.sequence_mask(length, maxlen=max_length), op.dtype) length_expanded = array_ops.expand_dims(length, -1) mean_op = math_ops.cumsum(op, axis=-1, reverse=False) / math_ops.cast( length_expanded, op.dtype) return mean_op * mask
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A tuple of `Tensor` objects representing the logits and the sequence length mask. """ with ops.name_scope('sequence_input_layer'): sequence_input, sequence_length = fc.SequenceFeatures( sequence_feature_columns)(features) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = fc.DenseFeatures(context_feature_columns)( features) sequence_input = fc.concatenate_context_input( context_input, sequence_input=sequence_input) # Ignore output state. sequence_length_mask = array_ops.sequence_mask(sequence_length) rnn_layer = rnn_layer_fn() rnn_outputs = rnn_layer(sequence_input, mask=sequence_length_mask, training=(mode == model_fn.ModeKeys.TRAIN)) logits = keras_layers.Dense(units=output_units, name='logits')(rnn_outputs) return logits, sequence_length_mask
def _attention(state, context): with vs.variable_scope("attention"): ctx_shape = context.get_shape().as_list() dim_ctx = ctx_shape[-1] if isinstance(state_size, tuple): _, m_prev = state _, m_size = state_size else: m_prev, m_size = state, state_size with vs.variable_scope("ctx_proj"): pctx = dense(context, units=dim_ctx, use_bias=True) with vs.variable_scope("state_proj"): pstate = array_ops.expand_dims(_linear(m_prev, dim_ctx, bias=False), axis=1) with vs.variable_scope("cell_proj") as cell_proj_scope: alpha = math_ops.reduce_sum(math_ops.tanh(pstate + pctx), [2]) if att_sequence_length is not None: alpha_mask = array_ops.sequence_mask( lengths=att_sequence_length, maxlen=ctx_shape[1], dtype=dtypes.float32) alpha = alpha * alpha_mask + ( (1.0 - alpha_mask) * dtypes.float32.min) alpha_normalized = nn_ops.softmax(alpha) ctx = math_ops.reduce_sum( context * array_ops.expand_dims(alpha_normalized, axis=2), axis=1) return ctx, alpha_normalized
def scaled_dot_product_attention(inputs, memory, memory_len, hidden, keep_prob=1.0, is_train=None, scope="dot_attention"): with tf.variable_scope(scope): d_inputs = dropout(inputs, keep_prob=keep_prob, is_train=is_train) d_memory = dropout(memory, keep_prob=keep_prob, is_train=is_train) JX = tf.shape(inputs)[1] with tf.variable_scope("attention"): inputs_ = tf.nn.relu( Dense(d_inputs, hidden, use_bias=False, scope="inputs")) memory_ = tf.nn.relu( Dense(d_memory, hidden, use_bias=False, scope="memory")) outputs = tf.matmul(inputs_, tf.transpose( memory_, [0, 2, 1])) / (hidden ** 0.5) score_mask = array_ops.sequence_mask( memory_len, maxlen=array_ops.shape(outputs)[2]) score_mask = tf.tile(tf.expand_dims(score_mask, axis=1), [1, JX, 1]) score_mask_values = float('-inf') * array_ops.ones_like(outputs) masked_outputs = array_ops.where(score_mask, outputs, score_mask_values) logits = tf.nn.softmax(masked_outputs) outputs = tf.matmul(logits, memory) res = tf.concat([inputs, outputs], axis=2) with tf.variable_scope("gate"): dim = res.get_shape().as_list()[-1] d_res = dropout(res, keep_prob=keep_prob, is_train=is_train) gate = tf.nn.sigmoid(Dense(d_res, dim, use_bias=False)) return res * gate
def dense_labels_to_sparse(dense, length): """Convert dense labels with sequence lengths to sparse tensor. Args: dense: tensor of shape [batch, max_length] length: int tensor of shape [batch] The length of each sequence in dense. Returns: tf.SparseTensor with values only for the valid elements of sequences. """ flat_values = array_ops.reshape(dense, [-1]) flat_indices = math_ops.range( array_ops.shape(flat_values, out_type=dtypes.int64)[0]) mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1]) flat_mask = array_ops.reshape(mask, [-1]) indices = array_ops.expand_dims( array_ops.boolean_mask(flat_indices, flat_mask), 1) values = array_ops.boolean_mask(flat_values, flat_mask) sparse = sparse_tensor.SparseTensor( indices=indices, values=math_ops.cast(values, dtypes.int32), dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64)) reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense)) max_length = math_ops.reduce_max(length) return sparse_tensor.SparseTensor( indices=reshaped.indices, values=reshaped.values, dense_shape=[ math_ops.cast(reshaped.dense_shape[0], dtypes.int64), math_ops.cast(max_length, dtypes.int64) ])
def do_attention(outputs, seq_length, name): atten_w = tf.get_variable(name=name + "_h", dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.1), shape=[rnnHiddenSize, attention_size]) atten_h = tf.tensordot(outputs, atten_w, axes=1) # atten_h_bias = tf.get_variable(name=name + "_h_bias", dtype=tf.float32, # shape=[config.attention_size], # initializer=tf.random_normal_initializer(stddev=0.1)) # atten_h = tf.tanh(tf.tensordot(outputs, atten_w, axes=1) + atten_h_bias) atten_v = tf.get_variable(name=name + "_v", dtype=tf.float32, shape=[attention_size], initializer=tf.random_normal_initializer(stddev=0.1)) values = tf.reduce_sum(tf.multiply(atten_h, atten_v), axis=-1) # values = tf.tensordot(atten_h, atten_v, axes=1) # consider the seq_length, set value=-np.inf for the index of outputs greater then seq_length values_mask_value = dtypes.as_dtype(tf.float32).as_numpy_dtype(-np.inf) values_mask = array_ops.sequence_mask( seq_length, maxlen=array_ops.shape(values)[1]) values_mask_values = values_mask_value * array_ops.ones_like(values) values = array_ops.where(values_mask, values, values_mask_values) score = tf.nn.softmax(values, axis=-1) score = tf.expand_dims(score, axis=-1) atten_output = tf.multiply(outputs, score) atten_output = tf.reduce_sum(atten_output, axis=1) return atten_output
def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense(labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def is_sequence_right_padded(mask, time_major): """Check the mask tensor and see if it right padded. For CuDNN kernel, it uses the sequence length param to skip the tailing timestep. If the data is left padded, or not a strict right padding (has masked value in the middle of the sequence), then CuDNN kernel won't be work properly in those cases. Left padded data: [[False, False, True, True, True]]. Right padded data: [[True, True, True, False, False]]. Mixture of mask/unmasked data: [[True, False, True, False, False]]. Note that for the mixed data example above, the actually data RNN should see are those 2 Trues (index 0 and 2), the index 1 False should be ignored and not pollute the internal states. Args: mask: the Boolean tensor with shape [batch, timestep] or [timestep, batch] when time_major is True. time_major: Boolean, whether the input mask is time major or batch major. Returns: boolean scalar tensor, whether the mask is strictly right padded. """ if time_major: mask = array_ops.transpose(mask) max_seq_length = array_ops.shape(mask)[1] count_of_true = math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32), axis=1) right_padded_mask = array_ops.sequence_mask( count_of_true, maxlen=max_seq_length) return math_ops.reduce_all(math_ops.equal(mask, right_padded_mask))
def cbhg(inputs, training=True, k=16, bank_filters=128, projection_filters=(128, 128), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=None, name='cbhg', reuse=False): with tf.variable_scope(name, reuse=reuse): # for correctness. if sequence_length is not None: mask = tf.expand_dims(array_ops.sequence_mask(sequence_length, tf.shape(inputs)[1], tf.float32), -1) inputs = inputs * mask conv_bank_out = conv1d_bank(inputs, training=training, k=k, bank_filters=bank_filters, reuse=reuse) # for correctness. if sequence_length is not None: conv_bank_out = conv_bank_out * mask pooled_conv = tf.layers.max_pooling1d(conv_bank_out, pool_size=2, strides=1, padding='same') # for correctness. if sequence_length is not None: pooled_conv = pooled_conv * mask conv_proj_out = conv1d_projections(pooled_conv, training=training, projection_filters=projection_filters, reuse=reuse) highway_inputs = conv_proj_out + inputs if projection_filters[-1] != highway_units: # linear transform for highway. highway_inputs = tf.layers.dense(highway_inputs, highway_units) # for correctness. if sequence_length is not None: highway_inputs = highway_inputs * mask highway_outputs = highway_net(highway_inputs, layers=highway_layers, reuse=reuse) # for correctness. if sequence_length is not None: highway_outputs = highway_outputs * mask bi_gru_out = bi_gru(highway_outputs, units=bi_gru_units, sequence_length=sequence_length, reuse=reuse) return bi_gru_out
def dense_labels_to_sparse(dense, length): """Convert dense labels with sequence lengths to sparse tensor. Args: dense: tensor of shape [batch, max_length] length: int tensor of shape [batch] The length of each sequence in dense. Returns: tf.SparseTensor with values only for the valid elements of sequences. """ flat_values = array_ops.reshape(dense, [-1]) flat_indices = math_ops.range( array_ops.shape(flat_values, out_type=dtypes.int64)[0]) mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1]) flat_mask = array_ops.reshape(mask, [-1]) indices = array_ops.expand_dims( array_ops.boolean_mask(flat_indices, flat_mask), 1) values = array_ops.boolean_mask(flat_values, flat_mask) sparse = sparse_tensor.SparseTensor( indices=indices, values=math_ops.cast(values, dtypes.int32), dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64)) reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense)) max_length = math_ops.reduce_max(length) return sparse_tensor.SparseTensor( indices=reshaped.indices, values=reshaped.values, dense_shape=[ math_ops.cast(reshaped.dense_shape[0], dtypes.int64), math_ops.cast(max_length, dtypes.int64)])
def attention(self): with tf.variable_scope("attention"): time_steps = tf.shape(self.rnn_inputs)[1] seq_len_mask = array_ops.sequence_mask(self.sequence_length, time_steps, dtype=tf.float32) rank = self.rnn_inputs.get_shape().ndims rank = rank if rank is not None else array_ops.rank( self.rnn_inputs) extra_ones = array_ops.ones(rank - 2, dtype=tf.int32) seq_len_mask = tf.reshape( seq_len_mask, tf.reshape( tf.concat([ tf.reshape(tf.shape(seq_len_mask), [1, -1]), tf.reshape(extra_ones, [1, -1]) ], 1), [-1])) processed_h = array_ops.expand_dims(self.h, 1) v = tf.get_variable("attention_v", self.embed_dim, dtype=tf.float32) # h_tile = tf.tile(processed_h, [1, time_steps, 1]) # x_processed = v * tf.tanh(linear( # [self.rnn_inputs, h_tile], self.embed_dim, bias=True, scope_name="attention_fusion")) x_processed = v * tf.tanh(self.rnn_inputs + processed_h) x_processed_mask = x_processed * seq_len_mask score = array_ops.expand_dims( tf.nn.softmax(tf.reduce_sum(x_processed_mask, [2])), 2) self.context_vector = tf.reduce_sum(score * self.rnn_inputs, [1])
def crf_binary_score(tag_indices, sequence_lengths, transitions): """Computes the binary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. transitions: An object implementing transitions Returns: binary_scores: A [batch_size] vector of binary scores. """ # Get shape information. num_transitions = array_ops.shape(tag_indices)[1] - 1 # Truncate by one on each side of the sequence to get the start and end # indices of each transition. start_tag_indices = array_ops.slice(tag_indices, [0, 0], [-1, num_transitions]) end_tag_indices = array_ops.slice(tag_indices, [0, 1], [-1, num_transitions]) binary_scores = transitions.get_pairwise_at_indices( start_tag_indices, end_tag_indices) masks = array_ops.sequence_mask(sequence_lengths, maxlen=array_ops.shape(tag_indices)[1], dtype=dtypes.float32) truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1]) binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1) return binary_scores
def mask_activations_and_labels(activations, labels, sequence_lengths): """Remove entries outside `sequence_lengths` and returned flattened results. Args: activations: Output of the RNN, shape `[batch_size, padded_length, k]`. labels: Label values, shape `[batch_size, padded_length]`. sequence_lengths: A `Tensor` of shape `[batch_size]` with the unpadded length of each sequence. If `None`, then each sequence is unpadded. Returns: activations_masked: `logit` values with those beyond `sequence_lengths` removed for each batch. Batches are then concatenated. Shape `[tf.sum(sequence_lengths), k]` if `sequence_lengths` is not `None` and shape `[batch_size * padded_length, k]` otherwise. labels_masked: Label values after removing unneeded entries. Shape `[tf.sum(sequence_lengths)]` if `sequence_lengths` is not `None` and shape `[batch_size * padded_length]` otherwise. """ with ops.name_scope('mask_activations_and_labels', values=[activations, labels, sequence_lengths]): labels_shape = array_ops.shape(labels) batch_size = labels_shape[0] padded_length = labels_shape[1] if sequence_lengths is None: flattened_dimension = padded_length * batch_size activations_masked = array_ops.reshape(activations, [flattened_dimension, -1]) labels_masked = array_ops.reshape(labels, [flattened_dimension]) else: mask = array_ops.sequence_mask(sequence_lengths, padded_length) activations_masked = array_ops.boolean_mask(activations, mask) labels_masked = array_ops.boolean_mask(labels, mask) return activations_masked, labels_masked
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) mask = array_ops.sequence_mask( sequence_length, maxlen=max_time, dtype=dtypes.int32) mask = array_ops.transpose(mask, perm=[2, 0, 1]) # Use beam_width + 1 to mark the end of beam. masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=masked_beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. sorted_beam_ids = array_ops.where( math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile(array_ops.reshape( math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile(array_ops.reshape( math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape( gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined): """Convert to tensor and possibly mask `memory`. Args: memory: `Tensor`, shaped `[batch_size, max_time, ...]`. memory_sequence_length: `int32` `Tensor`, shaped `[batch_size]`. check_inner_dims_defined: Python boolean. If `True`, the `memory` argument's shape is checked to ensure all but the two outermost dimensions are fully defined. Returns: A (possibly masked), checked, new `memory`. Raises: ValueError: If `check_inner_dims_defined` is `True` and not `memory.shape[2:].is_fully_defined()`. """ memory = nest.map_structure( lambda m: ops.convert_to_tensor(m, name="memory"), memory) if memory_sequence_length is not None: memory_sequence_length = ops.convert_to_tensor( memory_sequence_length, name="memory_sequence_length") if check_inner_dims_defined: def _check_dims(m): if not m.get_shape()[2:].is_fully_defined(): raise ValueError("Expected memory %s to have fully defined inner dims, " "but saw shape: %s" % (m.name, m.get_shape())) nest.map_structure(_check_dims, memory) if memory_sequence_length is None: seq_len_mask = None else: seq_len_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(nest.flatten(memory)[0])[1], dtype=nest.flatten(memory)[0].dtype) seq_len_batch_size = ( memory_sequence_length.shape[0].value or array_ops.shape(memory_sequence_length)[0]) def _maybe_mask(m, seq_len_mask_): rank = m.get_shape().ndims rank = rank if rank is not None else array_ops.rank(m) extra_ones = array_ops.ones(rank - 2, dtype=dtypes.int32) m_batch_size = m.shape[0].value or array_ops.shape(m)[0] if memory_sequence_length is not None: message = ("memory_sequence_length and memory tensor batch sizes do not " "match.") with ops.control_dependencies([ check_ops.assert_equal( seq_len_batch_size, m_batch_size, message=message)]): seq_len_mask_ = array_ops.reshape( seq_len_mask_, array_ops.concat((array_ops.shape(seq_len_mask_), extra_ones), 0)) return m * seq_len_mask_ else: return m return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
def testCtcLossDenseIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 minimum_logits_length = 10 num_frames = minimum_logits_length + batch_size logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = math_ops.range(batch_size) + minimum_logits_length ctc_loss = ctc_ops.ctc_loss_dense(labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose( *self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=4e-06, atol=4e-06)
def check_dtypes(lengths_dtype, maxlen_dtype): res = array_ops.sequence_mask( constant_op.constant([1, 3, 2], dtype=lengths_dtype), constant_op.constant(5, dtype=maxlen_dtype)) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual(res.eval(), [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]])
def testOneDimensionalWithMaxlen(self): with self.test_session(): res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual( res.eval(), [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]])
def __call__(self, inputs, sequence_length=None, is_training=True, time_major=None): assert time_major is not None, "[*] You must specify whether is time_major or not!" if time_major: inputs = tf.transpose(inputs, perm=(1, 0, 2)) # Use batch major data. assert inputs.get_shape()[-1] == self.proj_unit[ 1], "[!] input's shape is not the same as ConvProj's output!" ### for correctness. if sequence_length is not None: mask = tf.expand_dims( array_ops.sequence_mask(sequence_length, tf.shape(inputs)[1], tf.float32), -1) inputs = inputs * mask ConvBankWithPool = Conv1dBankWithMaxPool(self.bank_K) ConvProj = Conv1dProjection(self.proj_unit) Highway = FCHighwayNet(self.highway_layers) cell = GRUCell(self.proj_unit[1]) fw_cell = FusedRNNCellAdaptor(cell) bw_cell = TimeReversedFusedRNN(fw_cell) ### calculate # conv net output_0 = ConvBankWithPool(inputs, is_training) ### for correctness. if sequence_length is not None: output_0 = output_0 * mask output_1 = ConvProj(output_0, is_training) # residual connect res_output = tf.identity(inputs) + output_1 # highway net highway_output = Highway(res_output) # biGRU # time major bGRUinp = tf.transpose(highway_output, perm=(1, 0, 2)) fw_out, _ = fw_cell(bGRUinp, sequence_length=sequence_length, scope="fw", dtype=tf.float32) bw_out, _ = bw_cell(bGRUinp, sequence_length=sequence_length, scope="bw", dtype=tf.float32) final_output = tf.concat([fw_out, bw_out], axis=-1) if not time_major: final_output = tf.transpose(final_output, perm=(1, 0, 2)) return final_output
def testOneDimensionalDtypeWithoutMaxlen(self): with self.test_session(): # test dtype and default maxlen: res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) self.assertAllEqual( res.eval(), [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
def testOneDimensionalDtypeWithoutMaxlen(self): with self.cached_session(): # test dtype and default maxlen: res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) self.assertAllEqual( res.eval(), [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*sess.run( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined): """Convert to tensor and possibly mask `memory`. Args: memory: `Tensor`, shaped `[batch_size, max_time, ...]`. memory_sequence_length: `int32` `Tensor`, shaped `[batch_size]`. check_inner_dims_defined: Python boolean. If `True`, the `memory` argument's shape is checked to ensure all but the two outermost dimensions are fully defined. Returns: A (possibly masked), checked, new `memory`. Raises: ValueError: If `check_inner_dims_defined` is `True` and not `memory.shape[2:].is_fully_defined()`. """ memory = nest.map_structure( lambda m: ops.convert_to_tensor(m, name="memory"), memory) if memory_sequence_length is not None: memory_sequence_length = ops.convert_to_tensor( memory_sequence_length, name="memory_sequence_length") if check_inner_dims_defined: def _check_dims(m): if not m.get_shape()[2:].is_fully_defined(): raise ValueError("Expected memory %s to have fully defined inner dims, " "but saw shape: %s" % (m.name, m.get_shape())) nest.map_structure(_check_dims, memory) if memory_sequence_length is None: seq_len_mask = None else: seq_len_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(nest.flatten(memory)[0])[1], dtype=nest.flatten(memory)[0].dtype) seq_len_batch_size = ( memory_sequence_length.shape[0].value or array_ops.shape(memory_sequence_length)[0]) def _maybe_mask(m, seq_len_mask): rank = m.get_shape().ndims rank = rank if rank is not None else array_ops.rank(m) extra_ones = array_ops.ones(rank - 2, dtype=dtypes.int32) m_batch_size = m.shape[0].value or array_ops.shape(m)[0] if memory_sequence_length is not None: message = ("memory_sequence_length and memory tensor batch sizes do not " "match.") with ops.control_dependencies([ check_ops.assert_equal( seq_len_batch_size, m_batch_size, message=message)]): seq_len_mask = array_ops.reshape( seq_len_mask, array_ops.concat((array_ops.shape(seq_len_mask), extra_ones), 0)) return m * seq_len_mask else: return m return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
def _maybe_mask_score(score, memory_sequence_length, score_mask_value): if memory_sequence_length is None: return score message = ("All values in memory_sequence_length must greater than zero.") with ops.control_dependencies( [check_ops.assert_positive(memory_sequence_length, message=message)]): score_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(score)[1]) score_mask_values = score_mask_value * array_ops.ones_like(score) return array_ops.where(score_mask, score, score_mask_values)
def testOneDimensionalWithoutMaxlen(self): with self.test_session(): res = array_ops.sequence_mask( constant_op.constant([0, 1, 4])) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) self.assertAllEqual( res.eval(), [[False, False, False, False], [True, False, False, False], [True, True, True, True]])
def testTwoDimensional(self): with self.test_session(): res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5) self.assertAllEqual(res.get_shape(), [1, 3, 5]) self.assertAllEqual(res.eval(), [[[True, False, False, False, False], [ True, True, True, False, False ], [True, True, False, False, False]]]) # test dtype and default maxlen: res = array_ops.sequence_mask( constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32) if ops._USE_C_API: self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4]) else: self.assertAllEqual(res.get_shape().as_list(), [2, 3, None]) self.assertAllEqual( res.eval(), [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]], [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]])
def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense( labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def _reset_padding(self, memory, memory_sequence_length, check_inner_dims_defined=True): """Reset the padding part for encoder inputs. This funtion comes from tensorflow's `_prepare_memory` function. """ memory = nest.map_structure( lambda m: ops.convert_to_tensor(m, name="memory"), memory) if memory_sequence_length is not None: memory_sequence_length = ops.convert_to_tensor( memory_sequence_length, name="memory_sequence_length") if check_inner_dims_defined: def _check_dims(m): if not m.get_shape()[2:].is_fully_defined(): raise ValueError( "Expected memory %s to have fully defined inner dims, " "but saw shape: %s" % (m.name, m.get_shape())) nest.map_structure(_check_dims, memory) if memory_sequence_length is None: seq_len_mask = None else: seq_len_mask = array_ops.sequence_mask( memory_sequence_length, maxlen=array_ops.shape(nest.flatten(memory)[0])[1], dtype=nest.flatten(memory)[0].dtype) seq_len_batch_size = (memory_sequence_length.shape[0].value or array_ops.shape(memory_sequence_length)[0]) def _maybe_mask(m, seq_len_mask): rank = m.get_shape().ndims rank = rank if rank is not None else array_ops.rank(m) extra_ones = array_ops.ones(rank - 2, dtype=dtypes.int32) m_batch_size = m.shape[0].value or array_ops.shape(m)[0] if memory_sequence_length is not None: message = ("memory_sequence_length and memory tensor " "batch sizes do not match.") with ops.control_dependencies([ check_ops.assert_equal( seq_len_batch_size, m_batch_size, message=message) ]): seq_len_mask = array_ops.reshape( seq_len_mask, array_ops.concat( (array_ops.shape(seq_len_mask), extra_ones), 0)) return m * seq_len_mask else: return m return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def _cdf(self, k): k = ops.convert_to_tensor(k, name="k") # If there are multiple batch dimension, flatten them into one. batch_flattened_probs = array_ops.reshape(self._probs, [-1, self._event_size]) batch_flattened_k = array_ops.reshape(k, (-1,)) # Form a tensor to sum over. mask_tensor = array_ops.sequence_mask(batch_flattened_k, self._event_size) to_sum_over = array_ops.where(mask_tensor, batch_flattened_probs, array_ops.zeros_like(batch_flattened_probs)) batch_flat_cdf = math_ops.reduce_sum(to_sum_over, axis=-1) return array_ops.reshape(batch_flat_cdf, self._batch_shape())
def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=-1) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossV2(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 max_label_length = 5 num_frames = 12 labels = random_ops.random_uniform( [batch_size, max_label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) label_length = random_ops.random_uniform( [batch_size], minval=2, maxval=max_label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_length, maxlen=max_label_length, dtype=label_length.dtype) labels *= label_mask logit_length = [num_frames] * batch_size with backprop.GradientTape() as t: t.watch(logits) ref_loss = ctc_ops.ctc_loss_v2( labels=labels, logits=logits, label_length=label_length, logit_length=logit_length) ref_grad = t.gradient(ref_loss, [logits]) sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length) def assert_same_loss_and_grads(loss): if context.executing_eagerly(): return with self.cached_session(): self.assertAllClose(*self.evaluate([loss, ref_loss])) grad = gradients_impl.gradients(loss, [logits]) self.assertAllClose( *self.evaluate([grad, ref_grad]), rtol=2e-06, atol=2e-06) assert_same_loss_and_grads( ctc_ops.ctc_loss_v2( labels=sparse_labels, logits=logits, label_length=label_length, logit_length=logit_length, blank_index=0))
def advanced_reduce_sum(values, values_length, axis): """ Reudces sum at `axis`. Args: values: A tensor with shape [batch, time, dim] or [time, batch, dim] values_length: A tensor with shape [batch,] axis: The axis indicating time, 0/1. Returns: The reduced tensor with shape [batch, dim] """ # [batch_size, time] mask = array_ops.sequence_mask( lengths=values_length, maxlen=array_ops.shape(values)[axis], dtype=dtypes.float32) if axis == 0: mask = array_ops.transpose(mask, perm=[1, 0]) masked_values = values * array_ops.expand_dims(mask, axis=2) return math_ops.reduce_sum(masked_values, axis=axis)
def _cdf(self, k): k = ops.convert_to_tensor(k, name="k") if self.validate_args: k = distribution_util.embed_check_integer_casting_closed( k, target_dtype=dtypes.int32) # If there are multiple batch dimension, flatten them into one. batch_flattened_probs = array_ops.reshape(self._probs, [-1, self._event_size]) batch_flattened_k = array_ops.reshape(k, [-1]) # Form a tensor to sum over. # We don't need to cast k to integer since `sequence_mask` does this for us. mask_tensor = array_ops.sequence_mask(batch_flattened_k, self._event_size) to_sum_over = array_ops.where(mask_tensor, batch_flattened_probs, array_ops.zeros_like(batch_flattened_probs)) batch_flat_cdf = math_ops.reduce_sum(to_sum_over, axis=-1) return array_ops.reshape(batch_flat_cdf, self._batch_shape())
def _cdf(self, k): k = ops.convert_to_tensor(k, name="k") if self.validate_args: k = distribution_util.embed_check_integer_casting_closed( k, target_dtype=dtypes.int32) k, probs = _broadcast_cat_event_and_params( k, self.probs, base_dtype=self.dtype.base_dtype) # batch-flatten everything in order to use `sequence_mask()`. batch_flattened_probs = array_ops.reshape(probs, (-1, self._event_size)) batch_flattened_k = array_ops.reshape(k, [-1]) to_sum_over = array_ops.where( array_ops.sequence_mask(batch_flattened_k, self._event_size), batch_flattened_probs, array_ops.zeros_like(batch_flattened_probs)) batch_flattened_cdf = math_ops.reduce_sum(to_sum_over, axis=-1) # Reshape back to the shape of the argument. return array_ops.reshape(batch_flattened_cdf, array_ops.shape(k))
def repeat(data, repeats, axis, name=None): """Repeats elements of `data`. Args: data: An `N`-dimensional tensor. repeats: A 1-D integer tensor specifying how many times each element in `axis` should be repeated. `len(repeats)` must equal `data.shape[axis]`. Supports broadcasting from a scalar value. axis: `int`. The axis along which to repeat values. Must be less than `max(N, 1)`. name: A name for the operation. Returns: A tensor with `max(N, 1)` dimensions. Has the same shape as `data`, except that dimension `axis` has size `sum(repeats)`. #### Examples: ```python >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) ['a', 'a', 'a', 'c', 'c'] >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] ``` """ if not isinstance(axis, int): raise TypeError("axis must be an int; got %s" % type(axis).__name__) with ops.name_scope(name, "Repeat", [data, repeats]): data = ops.convert_to_tensor(data, name="data") repeats = convert_to_int_tensor(repeats, name="repeats") repeats.shape.with_rank_at_most(1) # If `data` is a scalar, then upgrade it to a vector. data = _with_nonzero_rank(data) data_shape = array_ops.shape(data) # If `axis` is negative, then convert it to a positive value. axis = get_positive_axis(axis, data.shape.ndims) # Check data Tensor shapes. if repeats.shape.ndims == 1: data.shape.dims[axis].assert_is_compatible_with(repeats.shape[0]) # If we know that `repeats` is a scalar, then we can just tile & reshape. if repeats.shape.ndims == 0: expanded = array_ops.expand_dims(data, axis + 1) tiled = tile_one_dimension(expanded, axis + 1, repeats) result_shape = array_ops.concat( [data_shape[:axis], [-1], data_shape[axis + 1:]], axis=0) return array_ops.reshape(tiled, result_shape) # Broadcast the `repeats` tensor so rank(repeats) == axis + 1. if repeats.shape.ndims != axis + 1: repeats_shape = array_ops.shape(repeats) repeats_ndims = array_ops.rank(repeats) broadcast_shape = array_ops.concat( [data_shape[:axis + 1 - repeats_ndims], repeats_shape], axis=0) repeats = array_ops.broadcast_to(repeats, broadcast_shape) repeats.set_shape([None] * (axis + 1)) # Create a "sequence mask" based on `repeats`, where slices across `axis` # contain one `True` value for each repetition. E.g., if # `repeats = [3, 1, 2]`, then `mask = [[1, 1, 1], [1, 0, 0], [1, 1, 0]]`. max_repeat = math_ops.maximum(0, math_ops.reduce_max(repeats)) mask = array_ops.sequence_mask(repeats, max_repeat) # Add a new dimension around each value that needs to be repeated, and # then tile that new dimension to match the maximum number of repetitions. expanded = array_ops.expand_dims(data, axis + 1) tiled = tile_one_dimension(expanded, axis + 1, max_repeat) # Use `boolean_mask` to discard the extra repeated values. This also # flattens all dimensions up through `axis`. masked = array_ops.boolean_mask(tiled, mask) # Reshape the output tensor to add the outer dimensions back. if axis == 0: result = masked else: result_shape = array_ops.concat( [data_shape[:axis], [-1], data_shape[axis + 1:]], axis=0) result = array_ops.reshape(masked, result_shape) # Preserve shape information. if data.shape.ndims is not None: new_axis_size = 0 if repeats.shape[0] == 0 else None result.set_shape(data.shape[:axis].concatenate( [new_axis_size]).concatenate(data.shape[axis + 1:])) return result
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None): """Converts a `Tensor` into a `RaggedTensor`. The set of absent/default values may be specified using a vector of lengths or a padding value (but not both). If `lengths` is specified, then the output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`. If `padding` is specified, then any row *suffix* consisting entirely of `padding` will be excluded from the returned `RaggedTensor`. If neither `lengths` nor `padding` is specified, then the returned `RaggedTensor` will have no absent/default values. Examples: ```python >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) >>> ragged.from_tensor(dt).eval().tolist() [[5, 7, 0], [0, 3, 0], [6, 0, 0]] >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist() [[5, 7], [], [6, 0, 0]] >>> ragged.from_tensor(dt, padding=0).eval().tolist() [[5, 7], [0, 3], [6]] ``` Args: tensor: The `Tensor` to convert. Must have rank `ragged_rank + 1` or higher. lengths: An optional set of row lengths, specified using a 1-D integer `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in `tensor`). If specified, then `output[row]` will contain `tensor[row][:lengths[row]]`. Negative lengths are treated as zero. padding: An optional padding value. If specified, then any row suffix consisting entirely of `padding` will be excluded from the returned RaggedTensor. `padding` is a `Tensor` with the same dtype as `tensor` and with `shape=tensor.shape[ragged_rank + 1:]`. ragged_rank: Integer specifying the ragged rank for the returned `RaggedTensor`. Must be greater than zero. name: A name prefix for the returned tensors (optional). Returns: A `RaggedTensor` with the specified `ragged_rank`. The shape of the returned ragged tensor is compatible with the shape of `tensor`. Raises: ValueError: If both `lengths` and `padding` are specified. """ if lengths is not None and padding is not None: raise ValueError('Specify lengths or padding, but not both') if not isinstance(ragged_rank, int): raise TypeError('ragged_rank expected int, got %r' % ragged_rank) if ragged_rank <= 0: raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank) with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]): tensor = ops.convert_to_tensor(tensor, name='tensor') tensor.shape.with_rank_at_least(ragged_rank + 1) input_shape = array_ops.shape(tensor, out_type=dtypes.int64) ncols = input_shape[1] # Handle ragged_rank>1 via recursion: # If the output should have multiple ragged dimensions, then first # flatten the tensor to eliminate all but the last ragged dimension, # and recursively convert that flattened tensor. Then add on the splits # for the dimensions that we flattened out. if ragged_rank > 1: # Flatten `tensor` to eliminate all but the last ragged dimension. new_shape = array_ops.concat( [constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:]], axis=0) flattened = array_ops.reshape(tensor, new_shape) # Recursively convert the flattened tensor. values = from_tensor(flattened, lengths, padding) # The total number of elements in each dimension. E.g., if # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total. dim_size = math_ops.cumprod(input_shape) # Construct splits tensors for the dimensions that were flattened. new_splits = [ math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim] for dim in range(1, ragged_rank) ] return ragged_factory_ops.from_nested_row_splits(values, new_splits) # If padding was specified, then use it to find row lengths. if padding is not None: padding = ops.convert_to_tensor( padding, name='padding', dtype=tensor.dtype) padding.shape.assert_is_compatible_with(tensor.shape[2:]) # Find places where the padding is equal to the tensor. (This will # broadcast `padding` across the outermost 2 dimensions of `tensor`, # so `has_default_value.shape = tensor.shape`.) has_default_value = math_ops.equal(padding, tensor) # If the padding isn't a scalar, then require that all values in the # padding match each item in the tensor. After this block of code, # `has_default.shape = tensor.shape[:2]`. (Unfortunately, we can't just # use reduce_all for both cases, becaue when you pass an empty `axis` # list to reduce_all, it reduces all axes; but we want it to reduce no # axes -- i.e., to be a no-op.) tensor_rank = array_ops.rank(tensor) reduce_axis = math_ops.range(2, tensor_rank) has_default = control_flow_ops.cond( tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis), lambda: has_default_value) has_default.set_shape(tensor_shape.TensorShape([None, None])) has_default.set_shape(tensor.shape[:2]) # Use has_default it to find the length of each row: for each non-default # item in a row, calculate the length that the row needs to have to # include that item; and then take the max of those values (across each # row). has_nondefault = math_ops.logical_not(has_default) has_nondefault = math_ops.cast(has_nondefault, dtypes.int64) length_for_nondefault_value = ( has_nondefault * array_ops.expand_dims( math_ops.range(1, ncols + 1), 0)) lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1) # If we have lengths (either directly supplied, or computed from paddings), # then use those to construct splits; and then use masking to get the # corresponding values. if lengths is not None: lengths = ragged_util.convert_to_int_tensor(lengths, 'lengths', dtypes.int64) lengths.shape.assert_has_rank(1) lengths = math_ops.minimum(lengths, ncols) lengths = math_ops.maximum(lengths, 0) limits = math_ops.cumsum(lengths) splits = array_ops.concat( [array_ops.zeros([1], dtypes.int64), limits], axis=0) mask = array_ops.sequence_mask(lengths, maxlen=ncols) values = array_ops.boolean_mask(tensor, mask) return ragged_factory_ops.from_row_splits(values, splits) # If neither padding nor lengths were specified, then create a splits # vector that contains no default values, and reshape the input tensor # to form the values for the RaggedTensor. nrows = input_shape[0] nvals = nrows * ncols splits = math_ops.range(nrows + 1) * ncols values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0) values = array_ops.reshape(tensor, values_shape) return ragged_factory_ops.from_row_splits(values, splits)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` or a list of `time_len` tensors of shape `[batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. scope: `VariableScope` for the created subgraph; defaults to class name. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ with vs.variable_scope(scope or "lstm_block_wrapper"): is_list = isinstance(inputs, list) if is_list: inputs = array_ops.stack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros( array_ops.stack([batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None") if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask( sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile( array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat( [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0) mod_outputs = array_ops.concat( [array_ops.expand_dims(initial_output, [0]), outputs], 0) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unstack(outputs) final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output) return outputs, final_state
def _forward_backward_log(state_trans_log_probs, initial_state_log_probs, final_state_log_probs, observed_log_probs, sequence_length): """Forward-backward algorithm computed in log domain. Args: state_trans_log_probs: tensor of shape [states, states] or if different transition matrix per batch [batch_size, states, states] initial_state_log_probs: tensor of shape [batch_size, states] final_state_log_probs: tensor of shape [batch_size, states] observed_log_probs: tensor of shape [frames, batch_size, states] sequence_length: tensor of shape [batch_size] Returns: forward backward log probabilites: tensor of shape [frames, batch, states] log_likelihood: tensor of shape [batch_size] Raises: ValueError: If state_trans_log_probs has unknown or incorrect rank. """ if state_trans_log_probs.shape.ndims == 2: perm = [1, 0] elif state_trans_log_probs.shape.ndims == 3: perm = [0, 2, 1] else: raise ValueError( "state_trans_log_probs rank must be known and == 2 or 3, is: %s" % state_trans_log_probs.shape.ndims) bwd_state_trans_log_probs = array_ops.transpose(state_trans_log_probs, perm) batch_size = _get_dim(observed_log_probs, 1) def _forward(state_log_prob, obs_log_prob): state_log_prob = array_ops.expand_dims(state_log_prob, axis=1) # Broadcast. state_log_prob += state_trans_log_probs state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1) state_log_prob += obs_log_prob log_prob_sum = math_ops.reduce_logsumexp( state_log_prob, axis=-1, keepdims=True) state_log_prob -= log_prob_sum return state_log_prob fwd = _scan(_forward, observed_log_probs, initial_state_log_probs, inclusive=True) def _backward(accs, elems): """Calculate log probs and cumulative sum masked for sequence length.""" state_log_prob, cum_log_sum = accs obs_log_prob, mask = elems state_log_prob += obs_log_prob state_log_prob = array_ops.expand_dims(state_log_prob, axis=1) # Broadcast. state_log_prob += bwd_state_trans_log_probs state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1) log_prob_sum = math_ops.reduce_logsumexp( state_log_prob, axis=-1, keepdims=True) state_log_prob -= log_prob_sum cum_log_sum += array_ops.squeeze(log_prob_sum) * mask batched_mask = array_ops.expand_dims(mask, axis=1) out = state_log_prob * batched_mask out += final_state_log_probs * (1.0 - batched_mask) return out, cum_log_sum zero_log_sum = array_ops.zeros([batch_size]) maxlen = _get_dim(observed_log_probs, 0) mask = array_ops.sequence_mask(sequence_length, maxlen, dtypes.float32) mask = array_ops.transpose(mask, perm=[1, 0]) bwd, cum_log_sum = _scan(_backward, (observed_log_probs, mask), (final_state_log_probs, zero_log_sum), reverse=True, inclusive=True) fwd_bwd_log_probs = fwd[1:] + bwd[1:] fwd_bwd_log_probs_sum = math_ops.reduce_logsumexp( fwd_bwd_log_probs, axis=2, keepdims=True) fwd_bwd_log_probs -= fwd_bwd_log_probs_sum fwd_bwd_log_probs += math_ops.log(array_ops.expand_dims(mask, axis=2)) log_likelihood = bwd[0, :, 0] + cum_log_sum[0] return fwd_bwd_log_probs, log_likelihood
def testExceptions(self): with self.test_session(): with self.assertRaisesRegexp(ValueError, "lengths must be 1D"): array_ops.sequence_mask([[10, 20]], [10, 20]) with self.assertRaisesRegexp(ValueError, "maxlen must be scalar"): array_ops.sequence_mask([10, 20], [10, 20])
def testUnknownShape(self): lengths = array_ops.placeholder(dtype=dtypes.int32) res = array_ops.sequence_mask(lengths) self.assertEqual(res.shape, None)