def crf_unary_score(tag_indices, sequence_lengths, inputs): """Computes the unary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. Returns: unary_scores: A [batch_size] vector of unary scores. """ batch_size = array_ops.shape(inputs)[0] max_seq_len = array_ops.shape(inputs)[1] num_tags = array_ops.shape(inputs)[2] flattened_inputs = array_ops.reshape(inputs, [-1]) offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( array_ops.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len]) masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1]) unary_scores = math_ops.reduce_sum(unary_scores * masks, 1) return unary_scores
def testCrfLogNorm(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) num_words = inputs.shape[0] num_tags = inputs.shape[1] sequence_lengths = np.array(3, dtype=np.int32) with self.test_session() as sess: all_sequence_scores = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequence_scores.append( crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params))) brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores) log_norm = crf.crf_log_norm( inputs=array_ops.expand_dims(inputs, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) log_norm = array_ops.squeeze(log_norm, [0]) tf_brute_force_log_norm, tf_log_norm = sess.run( [brute_force_log_norm, log_norm]) self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def _cross_squared_distance_matrix(x, y): """Pairwise squared distance between two (batch) matrices' rows (2nd dim). Computes the pairwise distances between rows of x and rows of y Args: x: [batch_size, n, d] float `Tensor` y: [batch_size, m, d] float `Tensor` Returns: squared_dists: [batch_size, n, m] float `Tensor`, where squared_dists[b,i,j] = ||x[b,i,:] - y[b,j,:]||^2 """ x_norm_squared = math_ops.reduce_sum(math_ops.square(x), 2) y_norm_squared = math_ops.reduce_sum(math_ops.square(y), 2) # Expand so that we can broadcast. x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) y_norm_squared_tile = array_ops.expand_dims(y_norm_squared, 1) x_y_transpose = math_ops.matmul(x, y, adjoint_b=True) # squared_dists[b,i,j] = ||x_bi - y_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj squared_dists = x_norm_squared_tile - 2 * x_y_transpose + y_norm_squared_tile return squared_dists
def _smart_select(pred, fn_then, fn_else): """Selects fn_then() or fn_else() based on the value of pred. The purpose of this function is the same as `utils.smart_cond`. However, at the moment there is a bug (b/36297356) that seems to kick in only when `smart_cond` delegates to `tf.cond`, which sometimes results in the training hanging when using parameter servers. This function will output the result of `fn_then` or `fn_else` if `pred` is known at graph construction time. Otherwise, it will use `tf.where` which will result in some redundant work (both branches will be computed but only one selected). However, the tensors involved will usually be small (means and variances in batchnorm), so the cost will be small and will not be incurred at all if `pred` is a constant. Args: pred: A boolean scalar `Tensor`. fn_then: A callable to use when pred==True. fn_else: A callable to use when pred==False. Returns: A `Tensor` whose value is fn_then() or fn_else() based on the value of pred. """ pred_value = utils.constant_value(pred) if pred_value: return fn_then() elif pred_value is False: return fn_else() t_then = array_ops.expand_dims(fn_then(), 0) t_else = array_ops.expand_dims(fn_else(), 0) pred = array_ops.reshape(pred, [1]) result = array_ops.where(pred, t_then, t_else) return array_ops.squeeze(result, [0])
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Uniform Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, n], name): n = ops.convert_to_tensor(n, name="n") n_val = tensor_util.constant_value(n) shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()]) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) samples.set_shape(inferred_shape) return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims( self.range(), 0) * samples)
def _variance(self): p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1) outer_prod = math_ops.batch_matmul( array_ops.expand_dims(self._mean_val, -1), array_ops.expand_dims(p, -2)) return array_ops.batch_matrix_set_diag( -outer_prod, self._mean_val - self._mean_val * p)
def power_sums_tensor(array_size, power_matrix, multiplier): r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1). Args: array_size: The number of non-trivial sums to pre-compute. power_matrix: The "A" matrix above. multiplier: The "B" matrix above Returns: A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T S[0] is the zero matrix S[1] is B S[2] is A B A^T + B ...and so on """ array_size = math_ops.cast(array_size, dtypes.int32) power_matrix = ops.convert_to_tensor(power_matrix) identity_like_power_matrix = linalg_ops.eye( array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype) identity_like_power_matrix.set_shape( ops.convert_to_tensor(power_matrix).get_shape()) transition_powers = functional_ops.scan( lambda previous_power, _: math_ops.matmul(previous_power, power_matrix), math_ops.range(array_size - 1), initializer=identity_like_power_matrix) summed = math_ops.cumsum( array_ops.concat([ array_ops.expand_dims(multiplier, 0), math_ops.matmul( batch_times_matrix(transition_powers, multiplier), transition_powers, adjoint_b=True) ], 0)) return array_ops.concat( [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
def __call__(self, shape, dtype=None, partition_info=None): if dtype is None: dtype = self.dtype # Check the shape if len(shape) < 3 or len(shape) > 5: raise ValueError("The tensor to initialize must be at least " "three-dimensional and at most five-dimensional") if shape[-2] > shape[-1]: raise ValueError("In_filters cannot be greater than out_filters.") # Generate a random matrix a = random_ops.random_normal([shape[-1], shape[-1]], dtype=dtype, seed=self.seed) # Compute the qr factorization q, r = linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) q *= math_ops.sign(d) q = q[:shape[-2], :] q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) if len(shape) == 3: weight = array_ops.scatter_nd([[(shape[0]-1)//2]], array_ops.expand_dims(q, 0), shape) elif len(shape) == 4: weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]], array_ops.expand_dims(q, 0), shape) else: weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2, (shape[2]-1)//2]], array_ops.expand_dims(q, 0), shape) return weight
def _orthogonal_kernel(self, ksize, cin, cout): """Construct orthogonal kernel for convolution. Args: ksize: kernel size cin: number of input channels cout: number of output channels Returns: an [ksize, ksize, cin, cout] orthogonal kernel. Raises: ValueError: if cin > cout. """ if cin > cout: raise ValueError("The number of input channels cannot exceed " "the number of output channels.") orth = self._orthogonal_matrix(cout)[0:cin, :] if ksize == 1: return array_ops.expand_dims(array_ops.expand_dims(orth, 0), 0) p = self._block_orth(self._symmetric_projection(cout), self._symmetric_projection(cout)) for _ in range(ksize - 2): temp = self._block_orth(self._symmetric_projection(cout), self._symmetric_projection(cout)) p = self._matrix_conv(p, temp) for i in range(ksize): for j in range(ksize): p[i, j] = math_ops.matmul(orth, p[i, j]) return self._dict_to_tensor(p, ksize, ksize)
def _NthElementGrad(op, grad): """Return the gradients for NthElement. Args: op: The NthElementOp for which we need to generate gradients. grad: Tensor. The gradients passed to the NthElementOp Returns: A list of two tensors, the first being the gradient w.r.t. the input, the second being the gradient w.r.t. the N (None). """ input = op.inputs[0] output = op.outputs[0] # Compute the number of elements which equal to output in each reduction # dimension. If there are multiple elements then the gradient will be # divided between them. indicators = math_ops.cast( math_ops.equal(array_ops.expand_dims(output, -1), input), grad.dtype) grad = array_ops.expand_dims(grad, -1) num_selected = array_ops.expand_dims( math_ops.reduce_sum(indicators, -1), -1) return [math_ops.div(indicators, num_selected) * grad, None]
def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): """Gradient function for SoftmaxCrossEntropyWithLogits.""" # grad_loss is the backprop for cost, and we multiply it with the gradients # (which is output[1]) # grad_grad is the backprop for softmax gradient. # # Second derivative is just softmax derivative w.r.t. logits. softmax_grad = op.outputs[1] grad = _BroadcastMul(grad_loss, softmax_grad) def IsZero(g): # Some introspection to check if the gradient is feeding zeros if context.executing_eagerly(): # TODO(apassos) add an efficient way to detect eager zeros here. return False if g.op.type in ("ZerosLike", "Zeros"): return True const_fill_value = tensor_util.constant_value(g) return const_fill_value is not None and (const_fill_value == 0).all() logits = op.inputs[0] if grad_grad is not None and not IsZero(grad_grad): softmax = nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul( array_ops.expand_dims(grad_grad, 1), array_ops.expand_dims(softmax, 2)), axis=1)) * softmax) return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias, units): inputs = array_ops.transpose(inputs, perm=(1, 0, 2)) input_h = array_ops.expand_dims(input_h, axis=0) input_c = array_ops.expand_dims(input_c, axis=0) params = _canonical_to_params( weights=[ kernel[:, :units], kernel[:, units:units * 2], kernel[:, units * 2:units * 3], kernel[:, units * 3:], recurrent_kernel[:, :units], recurrent_kernel[:, units:units * 2], recurrent_kernel[:, units * 2:units * 3], recurrent_kernel[:, units * 3:], ], biases=[ bias[:units], bias[units:units * 2], bias[units * 2:units * 3], bias[units * 3:units * 4], bias[units * 4:units * 5], bias[units * 5:units * 6], bias[units * 6:units * 7], bias[units * 7:], ], shape=constant_op.constant([-1])) outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn( inputs, input_h=input_h, input_c=input_c, params=params) outputs = array_ops.transpose(outputs, perm=[1, 0, 2]) h = h[0] c = c[0] return outputs, [h, c], constant_op.constant( 'cudnn', dtype=dtypes.string, name='runtime')
def _operator_and_matrix(self, build_info, dtype, use_placeholder): shape = list(build_info.shape) assert shape[-1] == shape[-2] batch_shape = shape[:-2] num_rows = shape[-1] # Uniform values that are at least length 1 from the origin. Allows the # operator to be well conditioned. # Shape batch_shape multiplier = linear_operator_test_util.random_sign_uniform( shape=batch_shape, minval=1., maxval=2., dtype=dtype) # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args. lin_op_multiplier = multiplier if use_placeholder: lin_op_multiplier = array_ops.placeholder_with_default( multiplier, shape=None) operator = linalg_lib.LinearOperatorScaledIdentity( num_rows, lin_op_multiplier) multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(multiplier, -1), -1) matrix = multiplier_matrix * linalg_ops.eye( num_rows, batch_shape=batch_shape, dtype=dtype) return operator, matrix
def call(self, inputs, mask=None): self._validate_call_args(inputs=inputs, mask=mask) q = inputs[0] v = inputs[1] k = inputs[2] if len(inputs) > 2 else v q_mask = mask[0] if mask else None v_mask = mask[1] if mask else None scores = self._calculate_scores(query=q, key=k) if v_mask is not None: # Mask of shape [batch_size, 1, Tv]. v_mask = array_ops.expand_dims(v_mask, axis=-2) if self.causal: # Creates a lower triangular mask, so position i cannot attend to # positions j>i. This prevents the flow of information from the future # into the past. scores_shape = array_ops.shape(scores) # causal_mask_shape = [1, Tq, Tv]. causal_mask_shape = array_ops.concat( [array_ops.ones_like(scores_shape[:-2]), scores_shape[-2:]], axis=0) causal_mask = _lower_triangular_mask(causal_mask_shape) else: causal_mask = None scores_mask = _merge_masks(v_mask, causal_mask) result = self._apply_scores(scores=scores, value=v, scores_mask=scores_mask) if q_mask is not None: # Mask of shape [batch_size, Tq, 1]. q_mask = array_ops.expand_dims(q_mask, axis=-1) result *= math_ops.cast(q_mask, dtype=result.dtype) return result
def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].shape.dims ] input_bhwc = array_ops.shape(op.inputs[0]) batch_size = input_bhwc[0] channels = input_bhwc[3] # Create indices matrix for input tensor. # Note that 0 is preserved for padding location, # so indices for input start from 1 to 1 + rows_in * cols_in. input_indices_num = 1 + rows_in * cols_in input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64), (1, rows_in, cols_in, 1)) input_idx_patched = gen_array_ops.extract_image_patches( input_idx, op.get_attr("ksizes"), op.get_attr("strides"), op.get_attr("rates"), op.get_attr("padding")) # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].shape.dims] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") # Indices for output start from 0. output_indices_num = rows_out * cols_out * ksize_r * ksize_c output_idx = array_ops.reshape(math_ops.range(output_indices_num, dtype=ops.dtypes.int64), (1, rows_out, cols_out, ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). idx_matrix = array_ops.concat( [array_ops.expand_dims(input_idx_patched, axis=-1), array_ops.expand_dims(output_idx, axis=-1)], axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) sp_mat_full = sparse_tensor.SparseTensor( idx_map, array_ops.ones([output_indices_num], dtype=grad.dtype), sp_shape) # Remove all padding locations [0, :]. sp_mat = sparse_ops.sparse_slice(sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num)) grad_expanded = array_ops.transpose( array_ops.reshape( grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return [grad_out]
def testCrfLogLikelihood(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = crf.crf_log_likelihood( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = math_ops.reduce_logsumexp( all_sequence_log_likelihoods) tf_total_log_likelihood = sess.run(total_log_likelihood) self.assertAllClose(tf_total_log_likelihood, 0.0)
def _testDrawBoundingBoxColorCycling(self, img): """Tests if cycling works appropriately. Args: img: 3-D numpy image on which to draw. """ # THIS TABLE MUST MATCH draw_bounding_box_op.cc color_table = np.asarray([[1, 1, 0, 1], [0, 0, 1, 1], [1, 0, 0, 1], [0, 1, 0, 1], [0.5, 0, 0.5, 1], [0.5, 0.5, 0, 1], [0.5, 0, 0, 1], [0, 0, 0.5, 1], [0, 1, 1, 1], [1, 0, 1, 1]]) assert len(img.shape) == 3 depth = img.shape[2] assert depth <= color_table.shape[1] assert depth == 1 or depth == 3 or depth == 4 ## Set red channel to 1 if image is GRY. if depth == 1: color_table[:, 0] = 1 num_colors = color_table.shape[0] for num_boxes in range(1, num_colors + 2): # Generate draw_bounding_box_op drawn image image = np.copy(img) color = color_table[(num_boxes - 1) % num_colors, 0:depth] test_drawn_image = self._fillBorder(image, color) bboxes = np.asarray([0, 0, 1, 1]) bboxes = np.vstack([bboxes for _ in range(num_boxes)]) bboxes = math_ops.to_float(bboxes) bboxes = array_ops.expand_dims(bboxes, 0) image = ops.convert_to_tensor(image) image = image_ops_impl.convert_image_dtype(image, dtypes.float32) image = array_ops.expand_dims(image, 0) image = image_ops.draw_bounding_boxes(image, bboxes) with self.test_session(use_gpu=False) as sess: op_drawn_image = np.squeeze(sess.run(image), 0) self.assertAllEqual(test_drawn_image, op_drawn_image)
def center_bias(self, center_bias_var, gradients, hessians): # For in memory, we already have a full batch of gradients and hessians, # so just take a mean and proceed with centering. mean_gradients = array_ops.expand_dims( math_ops.reduce_mean(gradients, 0), 0) mean_heassians = array_ops.expand_dims(math_ops.reduce_mean(hessians, 0), 0) return self._center_bias_fn(center_bias_var, mean_gradients, mean_heassians)
def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder): shape = list(shape) assert shape[-1] == shape[-2] batch_shape = shape[:-2] num_rows = shape[-1] # Uniform values that are at least length 1 from the origin. Allows the # operator to be well conditioned. # Shape batch_shape multiplier = linear_operator_test_util.random_sign_uniform( shape=batch_shape, minval=1., maxval=2., dtype=dtype) operator = linalg_lib.LinearOperatorScaledIdentity(num_rows, multiplier) # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args. if use_placeholder: multiplier_ph = array_ops.placeholder(dtype=dtype) multiplier = multiplier.eval() operator = linalg_lib.LinearOperatorScaledIdentity( num_rows, multiplier_ph) feed_dict = {multiplier_ph: multiplier} else: feed_dict = None multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(multiplier, -1), -1) mat = multiplier_matrix * linalg_ops.eye( num_rows, batch_shape=batch_shape, dtype=dtype) return operator, mat, feed_dict
def call(self, inputs): # There is no TF op for 1D pooling, hence we make the inputs 4D. if self.data_format == 'channels_last': # input is NWC, make it NHWC inputs = array_ops.expand_dims(inputs, 1) # pool on the W dim pool_shape = (1, 1) + self.pool_size + (1,) strides = (1, 1) + self.strides + (1,) data_format = 'NHWC' else: # input is NCW, make it NCHW inputs = array_ops.expand_dims(inputs, 2) # pool on the W dim pool_shape = (1, 1, 1) + self.pool_size strides = (1, 1, 1) + self.strides data_format = 'NCHW' outputs = self.pool_function( inputs, ksize=pool_shape, strides=strides, padding=self.padding.upper(), data_format=data_format) if self.data_format == 'channels_last': return array_ops.squeeze(outputs, 1) else: return array_ops.squeeze(outputs, 2)
def frames(signal, frame_length, frame_step, name=None): """Frame a signal into overlapping frames. May be used in front of spectral functions. For example: ```python pcm = tf.placeholder(tf.float32, [None, 9152]) frames = tf.contrib.signal.frames(pcm, 512, 180) magspec = tf.abs(tf.spectral.rfft(frames, [512])) image = tf.expand_dims(magspec, 3) ``` Args: signal: A `Tensor` of shape `[batch_size, signal_length]`. frame_length: An `int32` or `int64` `Tensor`. The length of each frame. frame_step: An `int32` or `int64` `Tensor`. The step between frames. name: A name for the operation (optional). Returns: A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`. Raises: ValueError: if signal does not have rank 2. """ with ops.name_scope(name, "frames", [signal, frame_length, frame_step]): signal = ops.convert_to_tensor(signal, name="signal") frame_length = ops.convert_to_tensor(frame_length, name="frame_length") frame_step = ops.convert_to_tensor(frame_step, name="frame_step") signal_rank = signal.shape.ndims if signal_rank != 2: raise ValueError("expected signal to have rank 2 but was " + signal_rank) signal_length = array_ops.shape(signal)[1] num_frames = math_ops.ceil((signal_length - frame_length) / frame_step) num_frames = 1 + math_ops.cast(num_frames, dtypes.int32) pad_length = (num_frames - 1) * frame_step + frame_length pad_signal = array_ops.pad(signal, [[0, 0], [0, pad_length - signal_length]]) indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0) indices_frames = array_ops.tile(indices_frame, [num_frames, 1]) indices_step = array_ops.expand_dims( math_ops.range(num_frames) * frame_step, 1) indices_steps = array_ops.tile(indices_step, [1, frame_length]) indices = indices_frames + indices_steps # TODO(androbin): remove `transpose` when `gather` gets `axis` support pad_signal = array_ops.transpose(pad_signal) signal_frames = array_ops.gather(pad_signal, indices) signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1]) return signal_frames
def _sample_n(self, n, seed=None): shape = array_ops.concat(([n], self.batch_shape()), 0) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims(self.range(), 0) * samples)
def _mask_probs(probs, eos_token, finished): """Masks log probabilities. The result is that finished beams allocate all probability mass to eos and unfinished beams remain unchanged. Args: probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]` eos_token: An int32 id corresponding to the EOS token to allocate probability to. finished: A boolean tensor of shape `[batch_size, beam_width]` that specifies which elements in the beam are finished already. Returns: A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished beams stay unchanged and finished beams are replaced with a tensor with all probability on the EOS token. """ vocab_size = array_ops.shape(probs)[2] finished_mask = math_ops.cast(array_ops.expand_dims(finished, 2), probs.dtype) not_finished_mask = math_ops.cast( array_ops.expand_dims(math_ops.logical_not(finished), 2), probs.dtype) # These examples are not finished and we leave them non_finished_examples = not_finished_mask * probs # All finished examples are replaced with a vector that has all # probability on EOS finished_row = array_ops.one_hot( eos_token, vocab_size, dtype=probs.dtype, on_value=0., off_value=probs.dtype.min) finished_examples = finished_mask * finished_row return finished_examples + non_finished_examples
def _build_multilabel_adjacency(sparse_labels): """Builds multilabel adjacency matrix. As of March 14th, 2017, there's no op for the dot product between two sparse tensors in TF. However, there is `sparse_minimum` op which is equivalent to an AND op between two sparse boolean tensors. This computes the dot product between two sparse boolean inputs. Args: sparse_labels: List of 1-D boolean sparse tensors. Returns: adjacency_matrix: 2-D dense `Tensor`. """ num_pairs = len(sparse_labels) adjacency_matrix = array_ops.zeros([num_pairs, num_pairs]) for i in range(num_pairs): for j in range(num_pairs): sparse_dot_product = math_ops.to_float( sparse_ops.sparse_reduce_sum(sparse_ops.sparse_minimum( sparse_labels[i], sparse_labels[j]))) sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 0) sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 1) one_hot_matrix = array_ops.pad(sparse_dot_product, [[i, num_pairs-i-1], [j, num_pairs-j-1]], 'CONSTANT') adjacency_matrix += one_hot_matrix return adjacency_matrix
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: tf_summary.histogram(weight.name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = w_img.get_shape() if len(shape) > 1 and shape[0] > shape[1]: w_img = array_ops.transpose(w_img) if len(shape) == 1: w_img = array_ops.expand_dims(w_img, 0) w_img = array_ops.expand_dims(array_ops.expand_dims(w_img, 0), -1) tf_summary.image(weight.name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir)
def testCrfSequenceScore(self): transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int32) ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32), np.array([[4, 5, -3]], dtype=np.float32), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([1], dtype=np.int32) ] for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list, inputs_list, tag_indices_list): with self.test_session() as sess: sequence_score = crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) sequence_score = array_ops.squeeze(sequence_score, [0]) tf_sequence_score = sess.run(sequence_score) expected_sequence_score = self.calculateSequenceScore( inputs, transition_params, tag_indices, sequence_lengths) self.assertAllClose(tf_sequence_score, expected_sequence_score)
def _variance(self): scale = self.alpha_sum * math_ops.sqrt(1.0 + self.alpha_sum) alpha = self.alpha / scale outer_prod = -math_ops.batch_matmul( array_ops.expand_dims(alpha, dim=-1), array_ops.expand_dims(alpha, dim=-2) # column ) # row return array_ops.batch_matrix_set_diag(outer_prod, alpha * (self.alpha_sum / scale - alpha))
def loop_fn(i): loop_inputs = [ array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs ] loop_init_state = rnn_cell.LSTMStateTuple( *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state]) return model_fn(loop_inputs, loop_init_state)
def __init__(self, num_rows, multiplier, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, assert_proper_shapes=False, name="LinearOperatorScaledIdentity"): """Initialize a `LinearOperatorScaledIdentity`. The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which determines the size of each identity matrix, and a `multiplier`, which defines `dtype`, batch shape, and scale of each matrix. This operator is able to broadcast the leading (batch) dimensions. Args: num_rows: Scalar non-negative integer `Tensor`. Number of rows in the corresponding identity matrix. multiplier: `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar). is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. is_positive_definite: Expect that this operator is positive definite. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. If `True`, and static checks are inconclusive, add asserts to the graph. name: A name for this `LinearOperator` Raises: ValueError: If `num_rows` is determined statically to be non-scalar, or negative. """ self._assert_proper_shapes = assert_proper_shapes with ops.name_scope(name, values=[multiplier, num_rows]): self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier") super(LinearOperatorScaledIdentity, self).__init__( dtype=self._multiplier.dtype, is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, name=name) # Shape [B1,...Bb, 1, 1] self._multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(self.multiplier, -1), -1) self._multiplier_matrix_conj = math_ops.conj( self._multiplier_matrix) self._abs_multiplier = math_ops.abs(self.multiplier) self._num_rows = linear_operator_util.shape_tensor( num_rows, name="num_rows") self._num_rows_static = tensor_util.constant_value(self._num_rows) self._check_num_rows_possibly_add_asserts() self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype) self._num_rows_cast_to_real_dtype = math_ops.cast( self._num_rows, self.dtype.real_dtype)
def per_example_maxent_loss(labels, weights, logits, num_classes, eps=1e-15): """Maximum entropy loss for multiclass problems. Maximum entropy is a generalization of logistic loss for the case when more than 2 classes are present. Args: labels: Rank 2 (N, 1) or Rank 1 (N) tensor of per-example labels. weights: Rank 2 (N, 1) tensor of per-example weights. logits: Rank 2 (N, K) tensor of per-example predictions, K - num of classes. num_classes: number of classes in classification task. Used to expand label indices into one-hot encodings. eps: tolerance, used as a minimum possible value. Returns: loss: A Rank 2 (N, 1) tensor of per-example maxent loss update_op: An update operation to update the loss's internal state. """ labels = math_ops.to_int64(labels) # If labels are of rank 1, make them rank 2. labels_shape = labels.get_shape() if len(labels_shape) != 2: labels = array_ops.expand_dims(labels, 1) # Labels are indices of classes, convert them to one hot encodings. target_one_hot = array_ops.one_hot(indices=labels, depth=num_classes) labels = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) labels = math_ops.to_float(labels) # Calculate softmax probabilities for each class. unnormalized_probs = math_ops.exp(logits) normalizers = math_ops.reduce_sum(unnormalized_probs, 1, keepdims=True) softmax_predictions = math_ops.divide(unnormalized_probs, math_ops.add(normalizers, eps)) # Pull out the probabilities for real label. probs_for_real_class = math_ops.reduce_sum(labels * softmax_predictions, 1) # Add handling for values near 0 and 1. zeros = array_ops.zeros_like(probs_for_real_class, dtype=logits.dtype) + eps one_minus_eps = array_ops.ones_like( probs_for_real_class, dtype=logits.dtype) - eps # Take maximum(eps, pred) cond = (probs_for_real_class >= eps) probs_for_real_class = array_ops.where(cond, probs_for_real_class, zeros) # Take minimum(1-eps, pred) cond = (probs_for_real_class <= 1 - eps) probs_for_real_class = array_ops.where(cond, probs_for_real_class, one_minus_eps) unweighted_loss = array_ops.expand_dims(-math_ops.log(probs_for_real_class), 1) if weights is None: return unweighted_loss, control_flow_ops.no_op() else: return unweighted_loss * weights, control_flow_ops.no_op()
def __init__(self, cell, embedding, start_tokens, end_token, initial_state, beam_width, output_layer=None, emo_output_layer=None, emo_choice_layer=None, length_penalty_weight=0.0): """Initialize the ECMBeamSearchDecoder. Args: cell: An `RNNCell` instance. embedding: A callable that takes a vector tensor of `ids` (argmax ids), or the `params` argument for `embedding_lookup`. start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. end_token: `int32` scalar, the token that marks end of decoding. initial_state: A (possibly nested tuple of...) tensors and TensorArrays. beam_width: Python integer, the number of beams. output_layer: (Optional) An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Raises: TypeError: if `cell` is not an instance of `RNNCell`, or `output_layer` is not an instance of `tf.layers.Layer`. ValueError: If `start_tokens` is not a vector or `end_token` is not a scalar. """ if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access raise TypeError("cell must be an RNNCell, received: %s" % type(cell)) if (output_layer is not None and not isinstance(output_layer, layers_base.Layer)): raise TypeError("output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer # 普通词典projection # ECM output layer self._emo_output_layer = emo_output_layer # 情感词典projection self._emo_choice_layer = emo_choice_layer # 选择情感词概率的 projection,输出(0,1)之间的概率 if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._start_tokens = ops.convert_to_tensor(start_tokens, dtype=dtypes.int32, name="start_tokens") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._end_token = ops.convert_to_tensor(end_token, dtype=dtypes.int32, name="end_token") if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") self._batch_size = array_ops.size(start_tokens) self._beam_width = beam_width self._length_penalty_weight = length_penalty_weight self._initial_cell_state = nest.map_structure( self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._start_tokens = array_ops.tile( array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) self._start_inputs = self._embedding_fn(self._start_tokens) self._finished = array_ops.zeros([self._batch_size, self._beam_width], dtype=dtypes.bool)
def __init__(self, cell, embedding, start_tokens, end_token, initial_state, beam_width, output_layer=None, length_penalty_weight=0.0, reorder_tensor_arrays=True): """Initialize the BeamSearchDecoder. Args: cell: An `RNNCell` instance. embedding: A callable that takes a vector tensor of `ids` (argmax ids), or the `params` argument for `embedding_lookup`. start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. end_token: `int32` scalar, the token that marks end of decoding. initial_state: A (possibly nested tuple of...) tensors and TensorArrays. beam_width: Python integer, the number of beams. output_layer: (Optional) An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell state will be reordered according to the beam search path. If the `TensorArray` can be reordered, the stacked form will be returned. Otherwise, the `TensorArray` will be returned as is. Set this flag to `False` if the cell state contains `TensorArray`s that are not amenable to reordering. Raises: TypeError: if `cell` is not an instance of `RNNCell`, or `output_layer` is not an instance of `tf.layers.Layer`. ValueError: If `start_tokens` is not a vector or `end_token` is not a scalar. """ rnn_cell_impl.assert_like_rnncell("cell", cell) # pylint: disable=protected-access if (output_layer is not None and not isinstance(output_layer, layers_base.Layer)): raise TypeError( "output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer self._reorder_tensor_arrays = reorder_tensor_arrays if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._start_tokens = ops.convert_to_tensor( start_tokens, dtype=dtypes.int32, name="start_tokens") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._end_token = ops.convert_to_tensor( end_token, dtype=dtypes.int32, name="end_token") if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") self._batch_size = array_ops.size(start_tokens) self._beam_width = beam_width self._length_penalty_weight = length_penalty_weight self._initial_cell_state = nest.map_structure( self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._start_tokens = array_ops.tile( array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) self._start_inputs = self._embedding_fn(self._start_tokens) self._finished = array_ops.one_hot( array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, on_value=False, off_value=True, dtype=dtypes.bool)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished)) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = ( lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = ops.convert_to_tensor( beam_width, dtype=dtypes.int32, name="beam_width") next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod( word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32( word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or( previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def multiply_fisher_factor_replicated_one_hot(self, index): assert len(index) == 1, "Length of index was {}".format(len(index)) probs_slice = array_ops.expand_dims(self._probs[:, index[0]], -1) output_slice = math_ops.sqrt(probs_slice * (1 - probs_slice)) return insert_slice_in_zeros(output_slice, 1, int(self._logits.shape[1]), index[0])
def check_dense_labels_match_logits_and_reshape(labels, logits, expected_labels_dimension): """Checks labels shape matches logits, and reshapes if needed. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels shape must be [D0, D1, ... DN, expected_labels_dimension]. If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this method reshapes them to [D0, D1, ... DN, 1]. Args: labels: labels Tensor. logits: logits Tensor. expected_labels_dimension: Integer. Returns: Validated and reshaped labels Tensor. Raises: ValueError: If labels is a SparseTensor. ValueError: If labels shape is statically defined and fails validation. OpError: If labels shape is not statically defined and fails validation. """ if labels is None: raise ValueError(_LABEL_NONE_ERR_MSG) with ops.name_scope('labels', values=(labels, logits)) as scope: labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels) if isinstance(labels, sparse_tensor.SparseTensor): raise ValueError( _SPARSE_LABEL_ERR_MSG.format(expected_labels_dimension, expected_labels_dimension, expected_labels_dimension)) # Eager mode. if context.executing_eagerly(): labels_rank = labels._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (labels_rank is not None and logits_rank is not None and labels_rank == logits_rank - 1): labels = array_ops.expand_dims(labels, -1) labels_rank += 1 labels_shape = labels._shape_tuple() # pylint: disable=protected-access if labels_rank < 2: raise ValueError( 'labels must have rank at least 2. Received rank {}, ' 'shape {}'.format(labels_rank, labels_shape)) if labels_shape[-1] != expected_labels_dimension: raise ValueError( _MISMATCHED_LABEL_DIM_ERR_MSG.format( expected_labels_dimension, labels_shape[-1])) logits_shape = logits._shape_tuple() # pylint: disable=protected-access expected_labels_shape = logits_shape[:-1] + ( expected_labels_dimension, ) if expected_labels_shape != labels_shape: raise ValueError( '{}, expected_labels_shape: {}. labels_shape: {}.'.format( _LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension), expected_labels_shape, labels_shape)) return labels # Graph mode. if (labels.shape.ndims is not None and logits.shape.ndims is not None and labels.shape.ndims == logits.shape.ndims - 1): labels = array_ops.expand_dims(labels, -1) assert_rank = check_ops.assert_rank_at_least( labels, 2, message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension)) with ops.control_dependencies([assert_rank]): static_shape = labels.shape if static_shape.ndims is not None: final_dim = static_shape[-1] if (final_dim is not None) and (final_dim != expected_labels_dimension): raise ValueError( _MISMATCHED_LABEL_DIM_ERR_MSG.format( expected_labels_dimension, final_dim)) logits_shape = array_ops.shape(logits) expected_labels_shape = array_ops.concat( [logits_shape[:-1], [expected_labels_dimension]], axis=0) labels_shape = array_ops.shape(labels) assert_dimension = check_ops.assert_equal( expected_labels_shape, labels_shape, message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension), data=[ 'expected_labels_shape: ', expected_labels_shape, 'labels_shape: ', labels_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(labels, name=scope)
def ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis, keepdims, separator=None, name=None): """Aggregates across axes of a RaggedTensor using the given `Tensor` ops. Reduces `rt_input` along the dimensions given in `axis`. The rank of the tensor is reduced by 1 for each entry in `axis`. If `axis` is not specified, then all dimensions are reduced, and a scalar value is returned. This op assumes that `reduce_op` and `unsorted_segment_op` are associative; if not, then reducing multiple axes will return incorrect results. (In particular, reducing multiple axes is currently implemented by reducing the axes one at a time.) Args: reduce_op: The tensorflow `op` that should be used to reduce values in uniform dimensions. Must have the same signature and basic behavior as `reduce_sum`, `reduce_max`, etc. unsorted_segment_op: The tensorflow `op` that should be used to combine values in ragged dimensions. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced. axis: The axis or axes to reduce. May be `None` (to reduce all axes), an `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a given set of axes), or a `Tensor` with a constant value. Must be in the range `[0, rt_input.rank)`. keepdims: If true, retains reduced dimensions with length 1. separator: An optional string. Defaults to None. The separator to use when joining. The separator must not be set for non-string data types. (i.e. if separator is not None then it uses string ops) name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the reduced values. The returned tensor has the same dtype as `data`, and its shape is given by removing the dimensions specified in `axis` from `rt_input.shape`. The `ragged_rank` of the returned tensor is given by substracting any ragged dimensions specified in `axis` from `rt_input.ragged_rank`. Raises: ValueError: If `axis` contains a `Tensor` whose value is not constant. """ if not ragged_tensor.is_ragged(rt_input): if separator is None: return reduce_op(rt_input, axis, keepdims=keepdims, name=name) else: # When separator is not None, We infer that dtype is string and # reduce_join will be called. return reduce_op(rt_input, axis, keepdims=keepdims, name=name, separator=separator) if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise ValueError('axis must be known at graph construction time.') if isinstance(axis, np.ndarray): axis = axis.tolist() # When reducing all axes, just ignore splits & reduce the inner values. if axis is None: result = reduce_op(rt_input.flat_values, None, keepdims=keepdims, name=name) if keepdims: # Expand the result to the input number of dimensions. for _ in rt_input.shape[1:]: result = array_ops.expand_dims(result, axis=0) return result with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]): if isinstance(axis, (tuple, list)): if not axis: return rt_input elif len(axis) == 1: axis = axis[0] else: # When reducing multiple axes, as we reduce one at a time (see below), # the negative axis has to be converted to positive at the first run # as the sort with negative axis will have different orders. # See GitHub issue 27497. axis = [ array_ops.get_positive_axis(a, rt_input.shape.ndims, 'axis[%s]' % i, 'rank(input_tensor)') for i, a in enumerate(axis) ] # When reducing multiple axes, just reduce one at a time. This is less # efficient, and only works for associative ops. (In particular, it # does not work for reduce_mean.) However, reducing multiple axes at # once will probably require a nontrivial c++ op. axis = sorted(axis) inner_reduced = ragged_reduce_aggregate( reduce_op, unsorted_segment_op, rt_input, axis[-1], keepdims, separator) return ragged_reduce_aggregate(reduce_op, unsorted_segment_op, inner_reduced, axis[:-1], keepdims, separator) rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') axis = array_ops.get_positive_axis(axis, rt_input.shape.ndims, ndims_name='rank(input_tensor)') if axis == 0: # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N] row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1] num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0) segment_ids = range(row_lengths).values result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=0) return result elif axis == 1: # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N] num_segments = array_ops.shape(rt_input.row_splits)[0] - 1 segment_ids = segment_id_ops.row_splits_to_segment_ids( rt_input.row_splits) result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=1) return result else: # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] = # sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N] return rt_input.with_values( ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input.values, axis - 1, keepdims, separator))
def get_weights_and_check_match_logits(features, weight_column, logits, allow_per_logit_weights=False): """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: features: The features dict that contains weights. weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. allow_per_logit_weights: Boolean. Whether we allow weights along the logits dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. Raises: ValueError: If the weights `Tensor` cannot be cast into float. """ if allow_per_logit_weights: err_msg = ( 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' '[D0, D1, ... DN, logits_dimension]') else: err_msg = ( 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') with ops.name_scope('weights', values=tuple(six.itervalues(features)) + (logits, )) as scope: # Fetch the weights. if weight_column is None: return 1. # TODO(b/117839674): update feature_column if isinstance(weight_column, six.string_types): weight_column = feature_column_lib.numeric_column( key=weight_column, shape=(1, )) if not isinstance(weight_column, (feature_column_lib.NumericColumn, _NumericColumn)): raise TypeError( 'Weight column must be either a string or NumericColumn.' ' Given type: {}.'.format(type(weight_column))) weights = weight_column._get_dense_tensor( # pylint: disable=protected-access _LazyBuilder(features)) if not (weights.dtype.is_floating or weights.dtype.is_integer): raise ValueError('Weight column should be castable to float. ' 'Given dtype: {}'.format(weights.dtype)) weights = math_ops.to_float(weights, name='weights') # Validate the weights shape. # Eager mode. if context.executing_eagerly(): weights_shape = weights._shape_tuple() # pylint: disable=protected-access logits_shape = logits._shape_tuple() # pylint: disable=protected-access weights_rank = weights._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (weights_rank is not None and logits_rank is not None and weights_rank == logits_rank - 1): if logits_shape[:-1] != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = logits_shape[:-1] + (1, ) if allow_per_logit_weights: if (logits_shape != weights_shape and supported_weights_shape != weights_shape): raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) else: if supported_weights_shape != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return weights # Graph mode. weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and weights.shape.ndims == logits.shape.ndims - 1): assert_dimension = check_ops.assert_equal(logits_shape[:-1], weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) if allow_per_logit_weights: condition = math_ops.reduce_any([ math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), math_ops.reduce_all( math_ops.equal(supported_weights_shape, weights_shape)) ]) assert_dimension = control_flow_ops.Assert(condition=condition, data=[ err_msg, 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) else: assert_dimension = check_ops.assert_equal(supported_weights_shape, weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope)
def _sampled_scattered_embedding_lookup(params, values, dimension=None, sampled_candidates=None, hash_key=None, name=None): """Looks up embeddings using parameter hashing for each value in `values`. This method looks up selected embedding dimensions if `sampled_candidates` is given, otherwise looks up all dimensions. The i-th embedding component of a value v in `values` is found by retrieving the weight whose index is a fingerprint of the pair (v,i). The concept is explored as "feature hashing" for model compression in this paper: http://arxiv.org/pdf/1504.04788.pdf Feature hashing has the pleasant effect of allowing us to compute an embedding without needing a pre-determined vocabulary, relieving some amount of process complexity. It also allows for us to maintain embeddings for possibly trillions of features with a fixed amount of memory. Note that this is superior to out-of-vocabulary shared "hash buckets" in that the embedding is extremely likely to be unique for each token as opposed to being shared across probably-colliding tokens. The price is that we must compute a hash once for each scalar in the token's embedding as opposed to once per token. If `params` is a list, it represents a partition of the embedding parameters. Each tensor in the list should have the same length, except for the first ones which may have an additional element. For instance 10 parameters can be partitioned in 4 tensors with length `[3, 3, 2, 2]`. Args: params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each tensor must be of rank 1 with fully-defined shape. values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`. dimension: Embedding dimension. The user must specify either `dimension` or `sampled_candidates`. sampled_candidates: An optional `Tensor` of slice indices to keep along the final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is ignored. If `None`, looks up all candidates. hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp (optional). name: An optional name for this op. Returns: A `Tensor` with shape `[d0, ..., dn, dimension]`. If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]` Raises: ValueError: if dimension is not positive or the partition size is invalid. """ if isinstance(params, variables.PartitionedVariable): params = list(params) if not isinstance(params, list): params = [params] with ops.name_scope(name, "scattered_embedding_lookup", params + [dimension, values]): # Flatten the values values_shape = array_ops.shape(values) values = array_ops.reshape(values, [-1, 1]) if sampled_candidates is None: if dimension is None: raise ValueError( "You must specify either dimension or sampled_candidates.") if dimension <= 0: raise ValueError("Dimension must be >0. Given is %d" % dimension) sampled_candidates = array_ops.tile( array_ops.expand_dims(math_ops.range(0, dimension), 0), array_ops.shape(values)) else: dimension = array_ops.shape(sampled_candidates)[math_ops.sub( array_ops.rank(sampled_candidates), 1)] sampled_candidates_shape = array_ops.shape(sampled_candidates) dimension_tensor = array_ops.reshape(dimension, shape=[ 1, ]) expected_shape = array_ops.concat_v2( [values_shape, dimension_tensor], 0) with ops.control_dependencies([ control_flow_ops.Assert( math_ops.reduce_all( math_ops.equal(sampled_candidates_shape, expected_shape)), [ "The shape of sampled_candidates: ", sampled_candidates_shape, " does not match the shape of values: ", values_shape ]) ]): # Flatten sampled_candidates, same way as values are flattened. sampled_candidates = array_ops.reshape(sampled_candidates, [-1, dimension]) num_partitions = len(params) partition_sizes = [] for p in range(num_partitions): shape = params[p].get_shape() shape.assert_has_rank(1) shape.assert_is_fully_defined() partition_sizes.append(shape[0].value) num_params = sum(partition_sizes) # Total number of parameters. # Assert the size of each partition. for p in range(num_partitions): expected_size = (num_params - p - 1) // num_partitions + 1 if partition_sizes[p] != expected_size: raise ValueError( "Tensor %d in params has size %d, expected %d." % (p, partition_sizes[p], expected_size)) # With two values v1 and v2 and 3 dimensions, we will cross # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]]. tensors_to_cross = [sampled_candidates, values] ids = sparse_feature_cross_op.sparse_feature_cross( tensors_to_cross, hashed_output=True, num_buckets=num_params, hash_key=hash_key) ids = sparse_ops.sparse_tensor_to_dense(ids) # No need to validate the indices since we have checked the params # dimensions and we know the largest id. result = embedding_ops.embedding_lookup(params, ids, partition_strategy="div", validate_indices=False) return array_ops.reshape( result, array_ops.concat_v2([values_shape, [dimension]], 0))
def _tile(feature): return array_ops.squeeze(array_ops.tile( array_ops.expand_dims(feature, 1), [1, num_unroll, 1]), axis=2)
def unit(hidden_state): hidden_state_expanded_attn = tf.tile( array_ops.expand_dims(hidden_state, 1), [1, tf.shape(self.encoder_states)[1], 1]) attn_rep = tf.concat( [self.encoder_states, hidden_state_expanded_attn], axis=2) attn_rep = tf.nn.tanh( tf.einsum( 'ijk,kl->ijl', tf.nn.tanh(tf.einsum("ijk,kl->ijl", attn_rep, self.W1)), self.W2)) u_i = tf.squeeze(tf.einsum('ijk,kl->ijl', attn_rep, self.w), 2) inp_len_mask = tf.sequence_mask(self.inp_len, tf.shape(self.inp_utt)[2], dtype=tf.float32) attn_mask = tf.reshape(inp_len_mask, shape=[self.batch_size, -1]) exp_u_i_masked = tf.multiply( tf.cast(attn_mask, dtype=tf.float64), tf.exp(tf.cast(u_i, dtype=tf.float64))) a = tf.cast(tf.einsum('i,ij->ij', tf.pow(tf.reduce_sum(exp_u_i_masked, 1), -1), exp_u_i_masked), dtype=tf.float32) inp_attn = tf.reduce_sum( tf.einsum('ij,ijk->ijk', a, self.encoder_states), 1) generate_dist = tf.nn.softmax( math_ops.matmul(tf.concat([hidden_state, inp_attn], axis=1), self.U) + self.b1) extra_zeros = tf.zeros( [self.batch_size, self.out_vocab_size - self.generate_size]) extended_generate_dist = tf.concat([generate_dist, extra_zeros], axis=1) hidden_state_expanded_result = tf.tile( array_ops.expand_dims(hidden_state, 1), [1, tf.shape(self.kb)[1], 1]) inp_attn_expanded_result = tf.tile( array_ops.expand_dims(inp_attn, 1), [1, tf.shape(self.kb)[1], 1]) result_attn_rep = tf.concat([ self.result_rep, hidden_state_expanded_result, inp_attn_expanded_result ], axis=2) result_attn_rep = tf.nn.tanh( tf.einsum( "ijk,kl->ijl", tf.nn.tanh( tf.einsum("ijk,kl->ijl", result_attn_rep, self.W_1)), self.W_12)) beta_logits = tf.squeeze( tf.einsum('ijk,kl->ijl', result_attn_rep, self.r_1), 2) beta_masked = tf.multiply( tf.cast(self.kb_mask, dtype=tf.float64), tf.exp(tf.cast(beta_logits, dtype=tf.float64))) beta = tf.cast(tf.einsum('i,ij->ij', tf.pow(tf.reduce_sum(beta_masked, 1), -1), beta_masked), dtype=tf.float32) hidden_state_expanded_keys = tf.tile( array_ops.expand_dims(array_ops.expand_dims(hidden_state, 1), 1), [1, tf.shape(self.kb)[1], tf.shape(self.kb)[2], 1]) inp_attn_expanded_keys = tf.tile( array_ops.expand_dims(array_ops.expand_dims(inp_attn, 1), 1), [1, tf.shape(self.kb)[1], tf.shape(self.kb)[2], 1]) result_key_rep = tf.concat([ self.keys_emb, hidden_state_expanded_keys, inp_attn_expanded_keys ], axis=3) result_key_rep = tf.nn.tanh( tf.einsum( 'ijkl,lm->ijkm', tf.nn.tanh( tf.einsum('ijkl,lm->ijkm', result_key_rep, self.W_2)), self.W_22)) gamma_logits = tf.squeeze( tf.einsum('ijkl,lm->ijkm', result_key_rep, self.r_2), 3) gamma_masked = tf.multiply( tf.cast(self.keys_mask, dtype=tf.float64), tf.exp(tf.cast(gamma_logits, dtype=tf.float64))) gamma = tf.einsum( 'ij,ijk->ijk', beta, tf.cast(tf.einsum('ij,ijk->ijk', tf.pow(tf.reduce_sum(gamma_masked, 2), -1), gamma_masked), dtype=tf.float32)) batch_nums_context = array_ops.expand_dims( tf.range(0, limit=self.batch_size, dtype=tf.int64), 1) batch_nums_tiled_context = tf.tile( batch_nums_context, [1, tf.shape(self.encoder_states)[1]]) flat_inp_utt = tf.reshape(self.inp_utt, shape=[self.batch_size, -1]) indices_context = tf.stack( [batch_nums_tiled_context, flat_inp_utt], axis=2) shape = [self.batch_size, self.out_vocab_size] context_copy_dist = tf.scatter_nd(indices_context, a, shape) db_rep = tf.reduce_sum( tf.einsum('ij,ijk->ijk', beta, self.result_rep), 1) p_db = tf.nn.sigmoid( tf.matmul(tf.concat([hidden_state, inp_attn, db_rep], axis=1), self.W4) + self.b3) p_db = tf.tile(p_db, [1, self.out_vocab_size]) one_minus_fn = lambda x: 1 - x one_minus_pdb = tf.map_fn(one_minus_fn, p_db) p_gens = tf.nn.sigmoid( tf.matmul(tf.concat([hidden_state, inp_attn, db_rep], axis=1), self.W3) + self.b2) p_gens = tf.tile(p_gens, [1, self.out_vocab_size]) one_minus_fn = lambda x: 1 - x one_minus_pgens = tf.map_fn(one_minus_fn, p_gens) batch_nums = array_ops.expand_dims( tf.range(0, limit=self.batch_size, dtype=tf.int64), 1) kb_ids = tf.reshape(self.kb, shape=[self.batch_size, -1]) num_kb_ids = tf.shape(kb_ids)[1] batch_nums_tiled = tf.tile(batch_nums, [1, num_kb_ids]) indices = tf.stack([batch_nums_tiled, kb_ids], axis=2) updates = tf.reshape(gamma, shape=[self.batch_size, -1]) shape = [self.batch_size, self.out_vocab_size] kb_dist = tf.scatter_nd(indices, updates, shape) kb_dist = tf.einsum('i,ij->ij', self.db_empty, kb_dist) copy_dist = tf.multiply(p_db, kb_dist) + tf.multiply( one_minus_pdb, context_copy_dist) final_dist = tf.multiply(p_gens, extended_generate_dist) + tf.multiply( one_minus_pgens, copy_dist) return final_dist
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False, row_weights=None): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update or project the row_factors, else update or project the column factors. sp_input: Please refer to comments for update_row_factors, update_col_factors, project_row_factors, and project_col_factors for restrictions. transpose_input: If True, the input is logically transposed and then the corresponding rows/columns of the transposed input are updated. row_weights: If not None, this is the row/column weights to be used for the update or projection. If None, use the corresponding weights from the model. Note that the feature (column/row) weights will be determined by the model. When not None, it can either be a scalar or a rank-1 tensor with the same number of elements as the number of rows of columns to be updated/projected. Returns: A tuple consisting of the following elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. unregularized_loss: A tensor (scalar) that contains the normalized minibatch loss corresponding to sp_input, without the regularization term. Add the regularization term below to yield the loss. regularization: A tensor (scalar) that contains the normalized regularization term for the minibatch loss corresponding to sp_input. sum_weights: The sum of the weights corresponding to sp_input. This can be used with unregularized loss to calculate the root weighted squared error. """ assert isinstance(sp_input, sparse_tensor.SparseTensor) if update_row_factors: left = self._row_factors right_factors = self._col_factors_cache row_wt = self._row_wt_cache col_wt = self._col_wt_cache total_rows = self._input_rows total_cols = self._input_cols sharding_func = WALSModel._get_sharding_func( self._input_rows, self._num_row_shards) gramian = self._col_gramian_cache else: left = self._col_factors right_factors = self._row_factors_cache row_wt = self._col_wt_cache col_wt = self._row_wt_cache total_rows = self._input_cols total_cols = self._input_rows sharding_func = WALSModel._get_sharding_func( self._input_cols, self._num_col_shards) gramian = self._row_gramian_cache transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = array_ops.split(value=sp_input.indices, num_or_size_splits=2, axis=1) update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0]) update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0]) col_ids = array_ops.expand_dims( math_ops.cast(all_col_ids, dtypes.int64), 1) row_ids = array_ops.expand_dims( math_ops.cast(all_row_ids, dtypes.int64), 1) if transpose_input: update_indices = update_col_indices row_shape = [ math_ops.cast( array_ops.shape(update_row_indices)[0], dtypes.int64) ] gather_indices = update_row_indices else: update_indices = update_row_indices row_shape = [ math_ops.cast( array_ops.shape(update_col_indices)[0], dtypes.int64) ] gather_indices = update_col_indices num_rows = math_ops.cast( array_ops.shape(update_indices)[0], dtypes.int64) col_shape = [num_rows] right = embedding_ops.embedding_lookup(right_factors, gather_indices, partition_strategy="div") new_sp_indices = array_ops.concat([row_ids, col_ids], 1) new_sp_shape = (array_ops.concat([row_shape, col_shape], 0) if transpose_input else array_ops.concat( [col_shape, row_shape], 0)) new_sp_input = sparse_tensor.SparseTensor(indices=new_sp_indices, values=sp_input.values, dense_shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * gramian) if self._regularization_matrix is not None: total_lhs += self._regularization_matrix if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * sparse_ops.sparse_tensor_dense_matmul( new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = array_ops.transpose( linalg_ops.matrix_solve(total_lhs, array_ops.transpose(total_rhs))) else: if row_weights is None: # TODO(yifanchen): Add special handling for single shard without using # embedding_lookup and perform benchmarks for those cases. Same for # col_weights lookup below. row_weights_slice = embedding_ops.embedding_lookup( row_wt, update_indices, partition_strategy="div") else: num_indices = array_ops.shape(update_indices)[0] with ops.control_dependencies([ check_ops.assert_less_equal( array_ops.rank(row_weights), 1) ]): row_weights_slice = control_flow_ops.cond( math_ops.equal(array_ops.rank(row_weights), 0), lambda: (array_ops.ones([num_indices]) * row_weights), lambda: math_ops.cast(row_weights, dtypes.float32)) col_weights = embedding_ops.embedding_lookup( col_wt, gather_indices, partition_strategy="div") partial_lhs, total_rhs = ( gen_factorization_ops.wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs")) total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs total_rhs = array_ops.expand_dims(total_rhs, -1) new_left_values = array_ops.squeeze( linalg_ops.matrix_solve(total_lhs, total_rhs), [2]) update_op_name = "row_update" if update_row_factors else "col_update" update_op = self.scatter_update(left, update_indices, new_left_values, sharding_func, name=update_op_name) # Create the loss subgraph loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input) if transpose_input else new_sp_input) # sp_approx is the low rank estimate of the input matrix, formed by # computing the product <\\(u_i, v_j\\)> for (i, j) in loss_sp_input.indices. sp_approx_vals = gen_factorization_ops.masked_matmul( new_left_values, right, loss_sp_input.indices, transpose_a=False, transpose_b=True) sp_approx = sparse_tensor.SparseTensor(loss_sp_input.indices, sp_approx_vals, loss_sp_input.dense_shape) sp_approx_sq = math_ops.square(sp_approx) sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1)) sp_residual_sq = math_ops.square(sp_residual) row_wt_mat = (constant_op.constant(0.) if self._row_weights is None else array_ops.expand_dims(row_weights_slice, 1)) col_wt_mat = (constant_op.constant(0.) if self._col_weights is None else array_ops.expand_dims(col_weights, 0)) # We return the normalized loss partial_row_gramian = math_ops.matmul(new_left_values, new_left_values, transpose_a=True) normalization_factor = total_rows / math_ops.cast( num_rows, dtypes.float32) unregularized_loss = ( self._unobserved_weight * ( # pyformat line break sparse_ops.sparse_reduce_sum(sp_residual_sq) - # pyformat break sparse_ops.sparse_reduce_sum(sp_approx_sq) + # pyformat break math_ops.trace(math_ops.matmul(partial_row_gramian, gramian))) + sparse_ops.sparse_reduce_sum( row_wt_mat * (sp_residual_sq * col_wt_mat))) * normalization_factor if self._regularization is not None: regularization = self._regularization * ( math_ops.trace(partial_row_gramian) * normalization_factor + math_ops.trace(gramian)) else: regularization = constant_op.constant(0.) sum_weights = self._unobserved_weight * math_ops.cast( total_rows * total_cols, dtypes.float32) if self._row_weights is not None and self._col_weights is not None: ones = sparse_tensor.SparseTensor( indices=loss_sp_input.indices, values=array_ops.ones(array_ops.shape(loss_sp_input.values)), dense_shape=loss_sp_input.dense_shape) sum_weights += sparse_ops.sparse_reduce_sum( row_wt_mat * (ones * col_wt_mat)) * normalization_factor return (new_left_values, update_op, unregularized_loss, regularization, sum_weights)
def expand_dims(input: ragged_tensor.Ragged, axis, name=None): # pylint: disable=redefined-builtin """Inserts a dimension with shape 1 into a potentially ragged tensor's shape. Given a potentially ragged tenor `input`, this operation inserts a dimension with size 1 at the dimension `axis` of `input`'s shape. The following table gives some examples showing how `ragged.expand_dims` impacts the shapes of different input tensors. Ragged dimensions are indicated by enclosing them in parentheses. input.shape | axis | result.shape ----------------------- | ---- | ----------------------------- `[D1, D2]` | `0` | `[1, D1, D2]` `[D1, D2]` | `1` | `[D1, 1, D2]` `[D1, D2]` | `2` | `[D1, D2, 1]` `[D1, (D2), (D3), D4]` | `0` | `[1, D1, (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `1` | `[D1, 1, (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `2` | `[D1, (D2), 1, (D3), D4]` `[D1, (D2), (D3), D4]` | `3` | `[D1, (D2), (D3), 1, D4]` `[D1, (D2), (D3), D4]` | `4` | `[D1, (D2), (D3), D4, 1]` Args: input: The potentially tensor that should be expanded with a new dimension. axis: An integer constant indicating where the new dimension should be inserted. name: A name for the operation (optional). Returns: A tensor with the same values as `input`, with an added dimension of size 1 at `axis`. #### Examples: >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> print(rt.shape) (2, None) >>> expanded = tf.expand_dims(rt, axis=0) >>> print(expanded.shape, expanded) (1, 2, None) <tf.RaggedTensor [[[1, 2], [3]]]> >>> expanded = tf.expand_dims(rt, axis=1) >>> print(expanded.shape, expanded) (2, 1, None) <tf.RaggedTensor [[[1, 2]], [[3]]]> >>> expanded = tf.expand_dims(rt, axis=2) >>> print(expanded.shape, expanded) (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]> """ with ops.name_scope(name, 'RaggedExpandDims', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.expand_dims(input, axis) ndims = None if input.shape.ndims is None else input.shape.ndims + 1 axis = array_ops.get_positive_axis(axis, ndims, ndims_name='rank(input)') if axis == 0: return ragged_tensor.RaggedTensor.from_uniform_row_length( input, uniform_row_length=input.nrows(), nrows=1, validate=False) elif axis == 1: return ragged_tensor.RaggedTensor.from_uniform_row_length( input, uniform_row_length=1, nrows=input.nrows(), validate=False) else: if ragged_tensor.is_ragged(input.values): return input.with_values(expand_dims(input.values, axis - 1)) else: return input.with_values( array_ops.expand_dims(input.values, axis - 1))
def combine_segments(segments, start_of_sequence_id, end_of_segment_id): """Combine one or more input segments for a model's input sequence. `combine_segments` combines the tokens of one or more input segments to a single sequence of token values and generates matching segment ids. `combine_segments` can follow a `Trimmer`, who limit segment lengths and emit `RaggedTensor` outputs, and can be followed up by `ModelInputPacker`. See `Detailed Experimental Setup` in `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding` (https://arxiv.org/pdf/1810.04805.pdf) for more examples of combined segments. `combine_segments` first flattens and combines a list of one or more segments (`RaggedTensor`s of n dimensions) together along the 1st axis, then packages any special tokens into a final n dimensional `RaggedTensor`. And finally `combine_segments` generates another `RaggedTensor` (with the same rank as the final combined `RaggedTensor`) that contains a distinct int id for each segment. Example usage: ``` segment_a = [[1, 2], [3, 4,], [5, 6, 7, 8, 9]] segment_b = [[10, 20,], [30, 40, 50, 60,], [70, 80]] expected_combined, expected_ids = combine_segments([segment_a, segment_b]) # segment_a and segment_b have been combined w/ special tokens describing # the beginning of a sequence and end of a sequence inserted. expected_combined=[ [101, 1, 2, 102, 10, 20, 102], [101, 3, 4, 102, 30, 40, 50, 60, 102], [101, 5, 6, 7, 8, 9, 102, 70, 80, 102], ] # ids describing which items belong to which segment. expected_ids=[ [0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]] ``` Args: segments: A list of `RaggedTensor`s with the tokens of the input segments. All elements must have the same dtype (int32 or int64), same rank, and same dimension 0 (namely batch size). Slice `segments[i][j, ...]` contains the tokens of the i-th input segment to the j-th example in the batch. start_of_sequence_id: a python int or scalar Tensor containing the id used to denote the start of a sequence (e.g. `[CLS]` token in BERT terminology). end_of_segment_id: a python int or scalar Tensor containing the id used to denote end of a segment (e.g. the `[SEP]` token in BERT terminology). Returns: a tuple of (combined_segments, segment_ids), where: combined_segments: A `RaggedTensor` with segments combined and special tokens inserted. segment_ids: A `RaggedTensor` w/ the same shape as `combined_segments` and containing int ids for each item detailing the segment that they correspond to. """ # Create special tokens ([CLS] and [SEP]) that will be combined with the # segments if len(segments) <= 0: raise ValueError("`segments` must be a nonempty list.") segment_dtype = segments[0].dtype if segment_dtype not in (dtypes.int32, dtypes.int64): raise ValueError("`segments` must have elements with dtype of int32 or " + "int64") start_of_sequence_id = ops.convert_to_tensor( start_of_sequence_id, dtype=segment_dtype) end_of_segment_id = ops.convert_to_tensor( end_of_segment_id, dtype=segment_dtype) start_sequence_id = math_ops.cast(start_of_sequence_id, segment_dtype) end_segment_id = math_ops.cast(end_of_segment_id, segment_dtype) start_seq_tokens = array_ops.tile([start_sequence_id], [segments[0].nrows()]) end_segment_tokens = array_ops.tile([end_segment_id], [segments[0].nrows()]) for i in range(segments[0].ragged_rank): start_seq_tokens = array_ops.expand_dims(start_seq_tokens, 1) end_segment_tokens = array_ops.expand_dims(end_segment_tokens, 1) special_token_segment_template = array_ops.ones_like(start_seq_tokens) # Combine all segments w/ special tokens segments_to_combine = [start_seq_tokens] for seg in segments: segments_to_combine.append(seg) segments_to_combine.append(end_segment_tokens) segments_combined = array_ops.concat(segments_to_combine, 1) # Create the segment ids, making sure to account for special tokens. segment_ids_to_combine = [] segment_ids_to_combine.append(special_token_segment_template * 0) for i, item in enumerate(segments): # Add segment id segment_id = array_ops.ones_like(item) * i segment_ids_to_combine.append(segment_id) # Add for SEP special_token_segment_id = special_token_segment_template * i segment_ids_to_combine.append(special_token_segment_id) segment_ids = array_ops.concat(segment_ids_to_combine, 1) return segments_combined, segment_ids
def __init__(self, num_rows, multiplier, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, is_square=True, assert_proper_shapes=False, name="LinearOperatorScaledIdentity"): r"""Initialize a `LinearOperatorScaledIdentity`. The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which determines the size of each identity matrix, and a `multiplier`, which defines `dtype`, batch shape, and scale of each matrix. This operator is able to broadcast the leading (batch) dimensions. Args: num_rows: Scalar non-negative integer `Tensor`. Number of rows in the corresponding identity matrix. multiplier: `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar). is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. is_positive_definite: Expect that this operator is positive definite, meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix\ #Extension_for_non_symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. If `True`, and static checks are inconclusive, add asserts to the graph. name: A name for this `LinearOperator` Raises: ValueError: If `num_rows` is determined statically to be non-scalar, or negative. """ self._assert_proper_shapes = assert_proper_shapes if not is_square: raise ValueError("A ScaledIdentity operator is always square.") with ops.name_scope(name, values=[multiplier, num_rows]): self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier") super(LinearOperatorScaledIdentity, self).__init__(dtype=self._multiplier.dtype, is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, is_square=is_square, name=name) # Shape [B1,...Bb, 1, 1] self._multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(self.multiplier, -1), -1) self._multiplier_matrix_conj = math_ops.conj( self._multiplier_matrix) self._abs_multiplier = math_ops.abs(self.multiplier) self._num_rows = linear_operator_util.shape_tensor(num_rows, name="num_rows") self._num_rows_static = tensor_util.constant_value(self._num_rows) self._check_num_rows_possibly_add_asserts() self._num_rows_cast_to_dtype = math_ops.cast( self._num_rows, self.dtype) self._num_rows_cast_to_real_dtype = math_ops.cast( self._num_rows, self.dtype.real_dtype)
def jac_mul(tangent): flat_tangent = array_ops.reshape(tangent, shape=[-1]) tangent_vector = array_ops.expand_dims(flat_tangent, 1) jvp_vector = math_ops.matmul(jac_fwd, tangent_vector) return array_ops.reshape(jvp_vector, tangent.shape)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value or a python list or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if variables_to_update is None: return y_true = math_ops.cast(y_true, dtype=dtypes.float32) y_pred = math_ops.cast(y_pred, dtype=dtypes.float32) y_pred.shape.assert_is_compatible_with(y_true.shape) if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): y_pred, y_true, sample_weight = squeeze_or_expand_dimensions( y_pred, y_true, sample_weight) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] thresholds = to_list(thresholds) num_thresholds = len(thresholds) num_predictions = array_ops.size(y_pred) # Reshape predictions and labels. predictions_2d = array_ops.reshape(y_pred, [1, -1]) labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. thresh_tiled = array_ops.tile( array_ops.expand_dims(array_ops.constant(thresholds), 1), array_ops.stack([1, num_predictions])) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1]) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) if sample_weight is not None: weights = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred) weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]), [num_thresholds, 1]) else: weights_tiled = None update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=dtypes.float32) if weights is not None: label_and_pred *= weights return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): """Squeeze or expand last dimension if needed. 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 (using `remove_squeezable_dimensions`). 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 from the new rank of `y_pred`. If `sample_weight` is scalar, it is kept scalar. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: y_pred: Predicted values, a `Tensor` of arbitrary dimensions. y_true: Optional label `Tensor` whose dimensions match `y_pred`. sample_weight: Optional weight scalar or `Tensor` whose dimensions match `y_pred`. Returns: Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has the last dimension squeezed, `sample_weight` could be extended by one dimension. If `sample_weight` is None, (y_pred, y_true) is returned. """ y_pred_shape = y_pred.shape y_pred_rank = y_pred_shape.ndims if y_true is not None: # If sparse matrix is provided as `y_true`, the last dimension in `y_pred` # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) # In this case, we should not try to remove squeezable dimension. y_true_shape = y_true.shape y_true_rank = y_true_shape.ndims if (y_true_rank is not None) and (y_pred_rank is not None): # Use static rank for `y_true` and `y_pred`. if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred) else: # Use dynamic rank. rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true) squeeze_dims = lambda: remove_squeezable_dimensions( # pylint: disable=g-long-lambda y_true, y_pred) is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1]) maybe_squeeze_dims = lambda: control_flow_ops.cond( # pylint: disable=g-long-lambda is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)) y_true, y_pred = control_flow_ops.cond( math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims) if sample_weight is None: return y_pred, y_true weights_shape = sample_weight.shape weights_rank = weights_shape.ndims if weights_rank == 0: # If weights is scalar, do nothing. return y_pred, y_true, sample_weight if (y_pred_rank is not None) and (weights_rank is not None): # Use static rank. if weights_rank - y_pred_rank == 1: sample_weight = array_ops.squeeze(sample_weight, [-1]) elif y_pred_rank - weights_rank == 1: sample_weight = array_ops.expand_dims(sample_weight, [-1]) return y_pred, y_true, sample_weight # Use dynamic rank. weights_rank_tensor = array_ops.rank(sample_weight) rank_diff = weights_rank_tensor - array_ops.rank(y_pred) maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1]) def _maybe_expand_weights(): expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1]) return control_flow_ops.cond(math_ops.equal(rank_diff, -1), expand_weights, lambda: sample_weight) def _maybe_adjust_weights(): return control_flow_ops.cond(math_ops.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights) # squeeze or expand last dim of `sample_weight` if its rank differs by 1 # from the new rank of `y_pred`. sample_weight = control_flow_ops.cond( math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight, _maybe_adjust_weights) return y_pred, y_true, sample_weight
def call(self, inputs, initial_state=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ is_list = isinstance(inputs, list) if is_list: inputs = array_ops.stack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape.dims[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape.dims[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError("Either initial_state or dtype needs to be specified") z = array_ops.zeros( array_ops.stack([batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None") if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell( inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask(sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile( array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat( [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0) mod_outputs = array_ops.concat( [array_ops.expand_dims(initial_output, [0]), outputs], 0) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unstack(outputs) final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output) return outputs, final_state
def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight): """Squeeze or expand last dimension if needed. 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 (using `confusion_matrix.remove_squeezable_dimensions`). 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 from the new rank of `y_pred`. If `sample_weight` is scalar, it is kept scalar. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: y_pred: Predicted values, a `Tensor` of arbitrary dimensions. y_true: Optional label `Tensor` whose dimensions match `y_pred`. sample_weight: Optional weight scalar or `Tensor` whose dimensions match `y_pred`. Returns: Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has the last dimension squeezed, `sample_weight` could be extended by one dimension. """ if y_true is not None: # squeeze last dim of `y_pred` or `y_true` if their rank differs by 1 y_true, y_pred = confusion_matrix.remove_squeezable_dimensions( y_true, y_pred) if sample_weight is None: return y_pred, y_true, None sample_weight = ops.convert_to_tensor(sample_weight) weights_shape = sample_weight.get_shape() weights_rank = weights_shape.ndims if weights_rank == 0: # If weights is scalar, do nothing. return y_pred, y_true, sample_weight y_pred_shape = y_pred.get_shape() y_pred_rank = y_pred_shape.ndims if (y_pred_rank is not None) and (weights_rank is not None): # Use static rank. if weights_rank - y_pred_rank == 1: sample_weight = array_ops.squeeze(sample_weight, [-1]) elif y_pred_rank - weights_rank == 1: sample_weight = array_ops.expand_dims(sample_weight, [-1]) return y_pred, y_true, sample_weight # Use dynamic rank. weights_rank_tensor = array_ops.rank(sample_weight) rank_diff = weights_rank_tensor - array_ops.rank(y_pred) maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1]) def _maybe_expand_weights(): return control_flow_ops.cond( math_ops.equal(rank_diff, -1), lambda: array_ops.expand_dims(sample_weight, [-1]), lambda: sample_weight) def _maybe_adjust_weights(): return control_flow_ops.cond( math_ops.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights) # squeeze or expand last dim of `sample_weight` if its rank differs by 1 # from the new rank of `y_pred`. sample_weight = control_flow_ops.cond( math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight, _maybe_adjust_weights) return y_pred, y_true, sample_weight
def _maybe_expand_weights(): expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1]) return control_flow_ops.cond(math_ops.equal(rank_diff, -1), expand_weights, lambda: sample_weight)
def _beam_search_step(time, logits, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[B, vocab_size]` beam_state: Current state of the beam search. An instance of `BeamState` batch_size: The batch size for this input. beam_width: The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] probs = nn_ops.log_softmax(logits) probs = _mask_probs(probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.get_shape().as_list()[-1] lengths_to_add = array_ops.one_hot( array_ops.tile(array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), vocab_size, 0, 1) add_mask = (1 - math_ops.to_int32(previously_finished)) lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add new_prediction_lengths = array_ops.expand_dims(prediction_lengths, 2) + lengths_to_add # Calculate the scores for each beam scores = _get_scores(log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) scores_flat = array_ops.reshape(scores, [batch_size, -1]) # During the first time step we only consider the initial beam scores_flat = control_flow_ops.cond( ops.convert_to_tensor(time) > 0, lambda: scores_flat, lambda: scores[:, 0]) # Pick the next beams according to the specified successors function next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=beam_width) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, range_input=batch_size, range_size=beam_width * vocab_size, final_shape=[static_batch_size, beam_width]) next_word_ids = math_ops.to_int32(word_indices % vocab_size) next_beam_ids = math_ops.to_int32(word_indices / vocab_size) # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, range_input=batch_size, range_size=beam_width, final_shape=[static_batch_size, beam_width]) next_finished = math_ops.logical_or( previously_finished, math_ops.equal(next_word_ids, end_token)) # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int32( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int32(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, range_input=batch_size, range_size=beam_width, final_shape=[static_batch_size, beam_width]) next_prediction_len += lengths_to_add next_state = BeamSearchDecoderState(cell_state=beam_state.cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def posterior_from_prior_state(self, prior_state, prior_state_var, observation, observation_model, predicted_observations, observation_noise): """Compute a posterior over states given an observation. Args: prior_state: Prior state mean [batch size x state dimension] prior_state_var: Prior state covariance [batch size x state dimension x state dimension] observation: The observed value corresponding to the predictions given [batch size x observation dimension] observation_model: The [batch size x observation dimension x model state dimension] Tensor indicating how a particular state is mapped to (pre-noise) observations for each part of the batch. predicted_observations: An (observation mean, observation variance) tuple computed based on the current state, usually the output of observed_from_state. observation_noise: A [batch size x observation dimension x observation dimension] or [observation dimension x observation dimension] Tensor with covariance matrices to use for each part of the batch (a two-dimensional input will be broadcast). Returns: Posterior mean and covariance (dimensions matching the first two arguments). """ observed_mean, observed_var = predicted_observations residual = observation - observed_mean # TODO(allenl): Can more of this be done using matrix_solve_ls? kalman_solve_rhs = math_ops.matmul(observation_model, prior_state_var, adjoint_b=True) # This matrix_solve adjoint doesn't make a difference symbolically (since # observed_var is a covariance matrix, and should be symmetric), but # filtering on multivariate series is unstable without it. See # test_multivariate_symmetric_covariance_float64 in kalman_filter_test.py # for an example of the instability (fails with adjoint=False). kalman_gain_transposed = linalg_ops.matrix_solve(matrix=observed_var, rhs=kalman_solve_rhs, adjoint=True) posterior_state = prior_state + array_ops.squeeze(math_ops.matmul( kalman_gain_transposed, array_ops.expand_dims(residual, -1), adjoint_a=True), axis=[-1]) gain_obs = math_ops.matmul(kalman_gain_transposed, observation_model, adjoint_a=True) identity_extradim = linalg_ops.eye(array_ops.shape(gain_obs)[1], dtype=gain_obs.dtype)[None] identity_minus_factor = identity_extradim - gain_obs if self._simplified_posterior_covariance_computation: # posterior covariance = # (I - kalman_gain * observation_model) * prior_state_var posterior_state_var = math_ops.matmul(identity_minus_factor, prior_state_var) else: observation_noise = ops.convert_to_tensor(observation_noise) # A Joseph form update, which provides better numeric stability than the # simplified optimal Kalman gain update, at the cost of a few extra # operations. Joseph form updates are valid for any gain (not just the # optimal Kalman gain), and so are more forgiving of numerical errors in # computing the optimal Kalman gain. # # posterior covariance = # (I - kalman_gain * observation_model) * prior_state_var # * (I - kalman_gain * observation_model)^T # + kalman_gain * observation_noise * kalman_gain^T left_multiplied_state_var = math_ops.matmul( identity_minus_factor, prior_state_var) multiplied_state_var = math_ops.matmul(identity_minus_factor, left_multiplied_state_var, adjoint_b=True) def _batch_observation_noise_update(): return (multiplied_state_var + math_ops.matmul( math_ops.matmul(kalman_gain_transposed, observation_noise, adjoint_a=True), kalman_gain_transposed)) def _matrix_observation_noise_update(): return (multiplied_state_var + math_ops.matmul( math_utils.batch_times_matrix( kalman_gain_transposed, observation_noise, adj_x=True), kalman_gain_transposed)) if observation_noise.get_shape().ndims is None: posterior_state_var = control_flow_ops.cond( math_ops.equal(array_ops.rank(observation_noise), 2), _matrix_observation_noise_update, _batch_observation_noise_update) else: # If static shape information exists, it gets checked in each cond() # branch, so we need a special case to avoid graph-build-time # exceptions. if observation_noise.get_shape().ndims == 2: posterior_state_var = _matrix_observation_noise_update() else: posterior_state_var = _batch_observation_noise_update() return posterior_state, posterior_state_var
def _mean(self): return self.alpha / array_ops.expand_dims(self.alpha_sum, -1)
def embedding_lookup_sparse(params, sp_ids, sp_weights, partition_strategy="mod", name=None, combiner=None, max_norm=None): """Computes embeddings for the given ids and weights. This op assumes that there is at least one id for each row in the dense tensor represented by sp_ids (i.e. there are no rows with empty features), and that all the indices of sp_ids are in canonical row-major order. It also assumes that all id values lie in the range [0, p0), where p0 is the sum of the size of params along dimension 0. Args: params: A single tensor representing the complete embedding tensor, or a list of P tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. sp_weights: either a SparseTensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, sp_weights must have exactly the same shape and indices as sp_ids. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: Optional name for the op. combiner: A string specifying the reduction op. Currently "mean", "sqrtn" and "sum" are supported. "sum" computes the weighted sum of the embedding results for each row. "mean" is the weighted sum divided by the total weight. "sqrtn" is the weighted sum divided by the square root of the sum of the squares of the weights. max_norm: If provided, each embedding is normalized to have l2 norm equal to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by sp_ids, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if shape(combined params) = [p0, p1, ..., pm] and shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn] then shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id 0, weight 1.0 [2, 3]: id 1, weight 3.0 with `combiner`="mean", then the output will be a 3x20 matrix where output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = (params[0, :] * 1.0) / 1.0 output[2, :] = (params[1, :] * 3.0) / 3.0 Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither None nor SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] if not isinstance(sp_ids, sparse_tensor.SparseTensor): raise TypeError("sp_ids must be SparseTensor") ignore_weights = sp_weights is None if not ignore_weights: if not isinstance(sp_weights, sparse_tensor.SparseTensor): raise TypeError("sp_weights must be either None or SparseTensor") sp_ids.values.get_shape().assert_is_compatible_with( sp_weights.values.get_shape()) sp_ids.indices.get_shape().assert_is_compatible_with( sp_weights.indices.get_shape()) sp_ids.dense_shape.get_shape().assert_is_compatible_with( sp_weights.dense_shape.get_shape()) # TODO(yleon): Add enhanced node assertions to verify that sp_ids and # sp_weights have equal indices and shapes. with ops.name_scope(name, "embedding_lookup_sparse", params + [sp_ids]) as name: segment_ids = sp_ids.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) ids = sp_ids.values if ignore_weights: ids, idx = array_ops.unique(ids) else: idx = None embeddings = embedding_lookup(params, ids, partition_strategy=partition_strategy, max_norm=max_norm) if not ignore_weights: weights = sp_weights.values if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) # Reshape weights to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set the weight shape, since after reshaping to bcast_weights_shape, # the shape becomes None. if embeddings.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(embeddings.get_shape().ndims - 1)])) embeddings *= weights if combiner == "sum": embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.segment_sum(embeddings, segment_ids) weight_sum = math_ops.segment_sum(weights, segment_ids) embeddings = math_ops.div(embeddings, weight_sum, name=name) elif combiner == "sqrtn": embeddings = math_ops.segment_sum(embeddings, segment_ids) weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name) else: assert False, "Unrecognized combiner" else: assert idx is not None if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids, name=name) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx, segment_ids, name=name) else: assert False, "Unrecognized combiner" return embeddings
def resize_images(images, new_height, new_width, method=ResizeMethod.BILINEAR, align_corners=False): """Resize `images` to `new_width`, `new_height` using the specified `method`. Resized images will be distorted if their original aspect ratio is not the same as `new_width`, `new_height`. To avoid distortions see [`resize_image_with_crop_or_pad`](#resize_image_with_crop_or_pad). `method` can be one of: * <b>`ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.] (https://en.wikipedia.org/wiki/Bilinear_interpolation) * <b>`ResizeMethod.NEAREST_NEIGHBOR`</b>: [Nearest neighbor interpolation.] (https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) * <b>`ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.] (https://en.wikipedia.org/wiki/Bicubic_interpolation) * <b>`ResizeMethod.AREA`</b>: Area interpolation. Args: images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. new_height: integer. new_width: integer. method: ResizeMethod. Defaults to `ResizeMethod.BILINEAR`. align_corners: bool. If true, exactly align all 4 cornets of the input and output. Defaults to `false`. Raises: ValueError: if the shape of `images` is incompatible with the shape arguments to this function ValueError: if an unsupported resize method is specified. Returns: If `images` was 4-D, a 4-D float Tensor of shape `[batch, new_height, new_width, channels]`. If `images` was 3-D, a 3-D float Tensor of shape `[new_height, new_width, channels]`. """ if images.get_shape().ndims is None: raise ValueError('\'images\' contains no shape.') # TODO(shlens): Migrate this functionality to the underlying Op's. is_batch = True if len(images.get_shape()) == 3: is_batch = False images = array_ops.expand_dims(images, 0) _, height, width, depth = _ImageDimensions(images) # Handle tensor-valued sizes as well as Python integers. try: new_width = ops.convert_to_tensor(new_width, dtypes.int32, name='new_width') new_width.get_shape().assert_has_rank(0) except (TypeError, ValueError): raise ValueError('new_width must be a scalar integer') try: new_height = ops.convert_to_tensor(new_height, dtypes.int32, name='new_height') new_height.get_shape().assert_has_rank(0) except (TypeError, ValueError): raise ValueError('new_height must be a scalar integer') new_width_const = tensor_util.constant_value(new_width) new_height_const = tensor_util.constant_value(new_height) if width == new_width_const and height == new_height_const: if not is_batch: images = array_ops.squeeze(images, squeeze_dims=[0]) return images new_size = array_ops.pack([new_height, new_width]) if method == ResizeMethod.BILINEAR: images = gen_image_ops.resize_bilinear(images, new_size, align_corners=align_corners) elif method == ResizeMethod.NEAREST_NEIGHBOR: images = gen_image_ops.resize_nearest_neighbor(images, new_size, align_corners=align_corners) elif method == ResizeMethod.BICUBIC: images = gen_image_ops.resize_bicubic(images, new_size, align_corners=align_corners) elif method == ResizeMethod.AREA: images = gen_image_ops.resize_area(images, new_size, align_corners=align_corners) else: raise ValueError('Resize method is not implemented.') # NOTE(mrry): The shape functions for the resize ops cannot unpack # the packed values in `new_size`, so set the shape here. images.set_shape([None, new_height_const, new_width_const, None]) if not is_batch: images = array_ops.squeeze(images, squeeze_dims=[0]) return images
def linear_to_mel_weight_matrix(num_mel_bins=20, num_spectrogram_bins=129, sample_rate=8000, lower_edge_hertz=125.0, upper_edge_hertz=3800.0, dtype=dtypes.float32, name=None): """Returns a matrix to warp linear scale spectrograms to the [mel scale][mel]. Returns a weight matrix that can be used to re-weight a `Tensor` containing `num_spectrogram_bins` linearly sampled frequency information from `[0, sample_rate / 2]` into `num_mel_bins` frequency information from `[lower_edge_hertz, upper_edge_hertz]` on the [mel scale][mel]. For example, the returned matrix `A` can be used to right-multiply a spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram" `M` of shape `[frames, num_mel_bins]`. # `S` has shape [frames, num_spectrogram_bins] # `M` has shape [frames, num_mel_bins] M = tf.matmul(S, A) The matrix can be used with `tf.tensordot` to convert an arbitrary rank `Tensor` of linear-scale spectral bins into the mel scale. # S has shape [..., num_spectrogram_bins]. # M has shape [..., num_mel_bins]. M = tf.tensordot(S, A, 1) # tf.tensordot does not support shape inference for this case yet. M.set_shape(S.shape[:-1].concatenate(A.shape[-1:])) Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. num_spectrogram_bins: An integer `Tensor`. How many bins there are in the source spectrogram data, which is understood to be `fft_size // 2 + 1`, i.e. the spectrogram only contains the nonredundant FFT bins. sample_rate: Python float. Samples per second of the input signal used to create the spectrogram. We need this to figure out the actual frequencies for each spectrogram bin, which dictates how they are mapped into the mel scale. lower_edge_hertz: Python float. Lower bound on the frequencies to be included in the mel spectrum. This corresponds to the lower edge of the lowest triangular band. upper_edge_hertz: Python float. The desired top edge of the highest frequency band. dtype: The `DType` of the result matrix. Must be a floating point type. name: An optional name for the operation. Returns: A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`. Raises: ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not positive, lower_edge_hertz is negative, frequency edges are incorrectly ordered, or upper_edge_hertz is larger than the Nyquist frequency. [mel]: https://en.wikipedia.org/wiki/Mel_scale """ with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name: # Note: As num_spectrogram_bins is passed to `math_ops.linspace` # and the validation is already done in linspace (both in shape function # and in kernel), there is no need to validate num_spectrogram_bins here. _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype) # This function can be constant folded by graph optimization since there are # no Tensor inputs. sample_rate = ops.convert_to_tensor(sample_rate, dtype, name='sample_rate') lower_edge_hertz = ops.convert_to_tensor(lower_edge_hertz, dtype, name='lower_edge_hertz') upper_edge_hertz = ops.convert_to_tensor(upper_edge_hertz, dtype, name='upper_edge_hertz') zero = ops.convert_to_tensor(0.0, dtype) # HTK excludes the spectrogram DC bin. bands_to_zero = 1 nyquist_hertz = sample_rate / 2.0 linear_frequencies = math_ops.linspace( zero, nyquist_hertz, num_spectrogram_bins)[bands_to_zero:] spectrogram_bins_mel = array_ops.expand_dims( _hertz_to_mel(linear_frequencies), 1) # Compute num_mel_bins triples of (lower_edge, center, upper_edge). The # center of each band is the lower and upper edge of the adjacent bands. # Accordingly, we divide [lower_edge_hertz, upper_edge_hertz] into # num_mel_bins + 2 pieces. band_edges_mel = shape_ops.frame(math_ops.linspace( _hertz_to_mel(lower_edge_hertz), _hertz_to_mel(upper_edge_hertz), num_mel_bins + 2), frame_length=3, frame_step=1) # Split the triples up and reshape them into [1, num_mel_bins] tensors. lower_edge_mel, center_mel, upper_edge_mel = tuple( array_ops.reshape(t, [1, num_mel_bins]) for t in array_ops.split(band_edges_mel, 3, axis=1)) # Calculate lower and upper slopes for every spectrogram bin. # Line segments are linear in the mel domain, not Hertz. lower_slopes = (spectrogram_bins_mel - lower_edge_mel) / (center_mel - lower_edge_mel) upper_slopes = (upper_edge_mel - spectrogram_bins_mel) / (upper_edge_mel - center_mel) # Intersect the line segments with each other and zero. mel_weights_matrix = math_ops.maximum( zero, math_ops.minimum(lower_slopes, upper_slopes)) # Re-add the zeroed lower bins we sliced out above. return array_ops.pad(mel_weights_matrix, [[bands_to_zero, 0], [0, 0]], name=name)
def loop_fn(i): return model_fn(array_ops.expand_dims(array_ops.gather(inp, i), 0))
def _SvdGrad(op, grad_s, grad_u, grad_v): """Gradient for the singular value decomposition.""" # The derivation for the compute_uv=False case, and most of # the derivation for the full_matrices=True case, are in # Giles' paper (see reference at top of file). A derivation for # the full_matrices=False case is available at # https://j-towns.github.io/papers/svd-derivative.pdf a = op.inputs[0] a_shape = a.get_shape().with_rank_at_least(2) grad_s_mat = array_ops.matrix_diag(grad_s) if not op.get_attr("compute_uv"): s, u, v = linalg_ops.svd(a, compute_uv=True) grad_a = math_ops.matmul( u, math_ops.matmul(grad_s_mat, v, adjoint_b=True)) grad_a.set_shape(a_shape) return grad_a full_matrices = op.get_attr("full_matrices") # TODO(rmlarsen): Make this work with complex types. if a.dtype.is_complex: raise NotImplementedError( "SVD gradient is not implemented for complex types and " "compute_uv=True.") grad_u_shape = grad_u.get_shape().with_rank_at_least(2) grad_v_shape = grad_v.get_shape().with_rank_at_least(2) m = a_shape.dims[-2].merge_with(grad_u_shape[-2]) n = a_shape.dims[-1].merge_with(grad_v_shape[-2]) batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with( grad_v_shape[:-2]) a_shape = batch_shape.concatenate([m, n]) m = a_shape.dims[-2].value n = a_shape.dims[-1].value # TODO(rmlarsen): Make this work with placeholders. if m is None or n is None: raise NotImplementedError( "SVD gradient has not been implemented for input with unknown " "inner matrix shape.") s = op.outputs[0] u = op.outputs[1] v = op.outputs[2] use_adjoint = False if m > n: # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the # Hermitian transpose of the gradient at the end. use_adjoint = True m, n = n, m u, v = v, u grad_u, grad_v = grad_v, grad_u with ops.control_dependencies([grad_s, grad_u, grad_v]): if full_matrices and abs(m - n) > 1: raise NotImplementedError( "svd gradient is not implemented for abs(m - n) > 1 " "when full_matrices is True") s_mat = array_ops.matrix_diag(s) s2 = math_ops.square(s) # NOTICE: Because of the term involving f, the gradient becomes # infinite (or NaN in practice) when singular values are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate singular values, the corresponding singular vectors are # only defined up a (k-dimensional) subspace. In practice, this can # lead to numerical instability when singular values are close but not # exactly equal. # Also, even with distinct singular values, the diagonal of f can have Inf # values before setting to zero, which hurt when differentiating through # this op. To avoid that, we add eye to the matrix before taking # the reciprocal. s_shape = array_ops.shape(s) eye = _linalg.eye(s_shape[-1], batch_shape=s_shape[:-1], dtype=s.dtype) f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1) + eye), array_ops.zeros_like(s)) s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s)) v1 = v[..., :, :m] grad_v1 = grad_v[..., :, :m] u_gu = math_ops.matmul(u, grad_u, adjoint_a=True) v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True) f_u = f * u_gu f_v = f * v_gv term1_nouv = (grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) + math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v))) term1 = math_ops.matmul( u, math_ops.matmul(term1_nouv, v1, adjoint_b=True)) if m == n: grad_a_before_transpose = term1 else: gv1t = array_ops.matrix_transpose(grad_v1) gv1t_v1 = math_ops.matmul(gv1t, v1) term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True) if full_matrices: v2 = v[..., :, m:n] grad_v2 = grad_v[..., :, m:n] v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True) term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True) u_s_inv = math_ops.matmul(u, s_inv_mat) term2 = math_ops.matmul(u_s_inv, term2_nous) grad_a_before_transpose = term1 + term2 if use_adjoint: grad_a = array_ops.matrix_transpose(grad_a_before_transpose) else: grad_a = grad_a_before_transpose grad_a.set_shape(a_shape) return grad_a
def _sparse(i): return sparse_tensor.SparseTensorValue( indices=array_ops.expand_dims( math_ops.range(i, dtype=dtypes.int64), 1), values=array_ops.fill([math_ops.to_int32(i)], i), dense_shape=[i])
def _parse_single_example_raw(serialized, names=None, sparse_keys=None, sparse_types=None, dense_keys=None, dense_types=None, dense_defaults=None, dense_shapes=None, name=None): """Parses a single `Example` proto. Args: serialized: A scalar string Tensor, a single serialized Example. See `_parse_example_raw` documentation for more details. names: (Optional) A scalar string Tensor, the associated name. See `_parse_example_raw` documentation for more details. sparse_keys: See `_parse_example_raw` documentation for more details. sparse_types: See `_parse_example_raw` documentation for more details. dense_keys: See `_parse_example_raw` documentation for more details. dense_types: See `_parse_example_raw` documentation for more details. dense_defaults: See `_parse_example_raw` documentation for more details. dense_shapes: See `_parse_example_raw` documentation for more details. name: A name for this operation (optional). Returns: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Raises: ValueError: if any feature is invalid. """ with ops.name_scope(name, "ParseSingleExample", [serialized, names]): serialized = ops.convert_to_tensor(serialized) serialized_shape = serialized.get_shape() if serialized_shape.ndims is not None: if serialized_shape.ndims != 0: raise ValueError("Input serialized must be a scalar") else: serialized = control_flow_ops.with_dependencies( [ control_flow_ops.Assert(math_ops.equal( array_ops.rank(serialized), 0), ["Input serialized must be a scalar"], name="SerializedIsScalar") ], serialized, name="SerializedDependencies") serialized = array_ops.expand_dims(serialized, 0) if names is not None: names = ops.convert_to_tensor(names) names_shape = names.get_shape() if names_shape.ndims is not None: if names_shape.ndims != 0: raise ValueError("Input names must be a scalar") else: names = control_flow_ops.with_dependencies( [ control_flow_ops.Assert(math_ops.equal( array_ops.rank(names), 0), ["Input names must be a scalar"], name="NamesIsScalar") ], names, name="NamesDependencies") names = array_ops.expand_dims(names, 0) outputs = _parse_example_raw(serialized, names=names, sparse_keys=sparse_keys, sparse_types=sparse_types, dense_keys=dense_keys, dense_types=dense_types, dense_defaults=dense_defaults, dense_shapes=dense_shapes, name=name) if dense_keys is not None: for d in dense_keys: d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d) outputs[d] = array_ops.squeeze(outputs[d], [0], name="Squeeze_%s" % d_name) if sparse_keys is not None: for s in sparse_keys: s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s) outputs[s] = sparse_tensor.SparseTensor( array_ops.slice(outputs[s].indices, [0, 1], [-1, -1], name="Slice_Indices_%s" % s_name), outputs[s].values, array_ops.slice(outputs[s].shape, [1], [-1], name="Squeeze_Shape_%s" % s_name)) return outputs