def testShapeInferenceKnownShape(self): with self.session(use_gpu=False): indices = array_ops.placeholder(dtypes.int64) shape = [4, 5, 6] output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertEqual(output.get_shape(), [4, 5, 6]) shape = array_ops.placeholder(dtypes.int64, shape=(3,)) output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertEqual(output.get_shape().as_list(), [None, None, None])
def one_hot_mask(labels, num_classes, scope=None): """Compute 1-hot encodings for masks. Given a label image, this computes the one hot encoding at each pixel. Args: labels: (batch_size, width, height, 1) tensor containing labels. num_classes: number of classes scope: optional scope name Returns: Tensor of shape (batch_size, width, height, num_classes) with a 1-hot encoding. """ with ops.name_scope(scope, "OneHotMask", [labels]): height, width, depth = _shape(labels) assert depth == 1 sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1])) sparse_size, _ = _shape(sparse_labels) indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1]) concated = array_ops.concat([indices, sparse_labels], 1) dense_result = sparse_ops.sparse_to_dense(concated, [sparse_size, num_classes], 1.0, 0.0) result = array_ops.reshape(dense_result, [height, width, num_classes]) return result
def testSparseExpandDims(self): for rank in range(1, 4): # Create a dummy input. When rank=3, shape=[2, 4, 6]. shape = np.arange(1, rank + 1) * 2 before = np.arange(np.prod(shape)).reshape(shape) # Make entries sparse. before *= np.random.binomial(1, .2, before.shape) dense_shape = before.shape indices = np.array(np.where(before)).T values = before[before != 0] # Try every possible valid value of axis. for axis in range(-rank - 1, rank): expected_after = np.expand_dims(before, axis) for axis_as_tensor in [False, True]: dense_shape_t = constant_op.constant(dense_shape, dtype=dtypes.int64) indices_t = constant_op.constant(indices) values_t = constant_op.constant(values) before_t = sparse_tensor.SparseTensor( indices=indices_t, values=values_t, dense_shape=dense_shape_t) if axis_as_tensor: axis = constant_op.constant(axis) s = sparse_ops.sparse_expand_dims(before_t, axis) d = sparse_ops.sparse_to_dense(s.indices, s.dense_shape, s.values) self.assertAllEqual(self.evaluate(d), expected_after)
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros( [], dtype=dtypes.int32)]
def _check(self, result_tensor, result_np, input_sp_t): self.assertAllEqual(input_sp_t.indices.eval(), result_tensor.indices.eval()) self.assertAllEqual(input_sp_t.shape.eval(), result_tensor.shape.eval()) res_densified = sparse_ops.sparse_to_dense(result_tensor.indices, result_tensor.shape, result_tensor.values).eval() self.assertAllEqual(res_densified, result_np)
def test_one(n, m, as_tensors): expected = np.eye(n, m) if as_tensors: m = constant_op.constant(m) n = constant_op.constant(n) s = sparse_ops.sparse_eye(n, m) d = sparse_ops.sparse_to_dense(s.indices, s.dense_shape, s.values) self.assertAllEqual(self.evaluate(d), expected)
def _check(self, result_tensor, result_np, input_sp_t): self.assertTrue(isinstance(result_tensor, sparse_tensor.SparseTensor)) self.assertTrue(isinstance(input_sp_t, sparse_tensor.SparseTensor)) self.assertAllEqual(input_sp_t.indices, result_tensor.indices) self.assertAllEqual(input_sp_t.dense_shape, result_tensor.dense_shape) res_densified = sparse_ops.sparse_to_dense( result_tensor.indices, result_tensor.dense_shape, result_tensor.values) self.assertAllEqual(result_np, res_densified)
def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None): """Calculate the sufficient statistics for the mean and variance of `x`. These sufficient statistics are computed using the one pass algorithm on an input that's optionally shifted. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: A `Tensor` containing the value by which to shift the data for numerical stability, or `None` if no shift is to be performed. A shift close to the true mean provides the most numerically stable results. keep_dims: produce statistics with the same dimensionality as the input. name: Name used to scope the operations that compute the sufficient stats. Returns: Four `Tensor` objects of the same type as `x`: * the count (number of elements to average over). * the (possibly shifted) sum of the elements in the array. * the (possibly shifted) sum of squares of the elements in the array. * the shift by which the mean must be corrected or None if `shift` is None. """ with ops.op_scope([x, axes, shift], name, "sufficient_statistics"): x = ops.convert_to_tensor(x, name="x") x_shape = x.get_shape() if x_shape.is_fully_defined(): counts = 1 m_shape = [] for d in xrange(x_shape.ndims): dim = x_shape[d].value if d in set(axes): counts *= dim dim = 1 m_shape.append(dim) counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. x_shape = array_ops.shape(x) select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape), True, False) m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape), x_shape) counts = math_ops.cast( math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count") if shift is not None: shift = ops.convert_to_tensor(shift, name="shift") m_ss = math_ops.sub(x, shift) v_ss = math_ops.squared_difference(x, shift) else: # no shift. m_ss = x v_ss = math_ops.square(x) m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss") v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss") return counts, m_ss, v_ss, shift
def sequence_loss_by_example(logits, targets, weights, num_decoder_symbols, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: list of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: list of 1D batch-sized int32 Tensors of the same length as logits. weights: list of 1D batch-sized float-Tensors of the same length as logits. num_decoder_symbols: integer, number of decoder symbols (output classes). average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: the log-perplexity for each sequence. Raises: ValueError: if len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.op_scope(logits + targets + weights, name, "sequence_loss_by_example"): batch_size = array_ops.shape(targets[0])[0] log_perp_list = [] length = batch_size * num_decoder_symbols for i in xrange(len(logits)): if softmax_loss_function is None: # TODO(lukaszkaiser): There is no SparseCrossEntropy in TensorFlow, so # we need to first cast targets into a dense representation, and as # SparseToDense does not accept batched inputs, we need to do this by # re-indexing and re-sizing. When TensorFlow adds SparseCrossEntropy, # rewrite this method. indices = targets[i] + num_decoder_symbols * math_ops.range(batch_size) with ops.device("/cpu:0"): # Sparse-to-dense must be on CPU for now. dense = sparse_ops.sparse_to_dense( indices, array_ops.expand_dims(length, 0), 1.0, 0.0) target = array_ops.reshape(dense, [-1, num_decoder_symbols]) crossent = nn_ops.softmax_cross_entropy_with_logits( logits[i], target, name="SequenceLoss/CrossEntropy{0}".format(i)) else: crossent = softmax_loss_function(logits[i], targets[i]) log_perp_list.append(crossent * weights[i]) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def _SparseToDense(sparse_indices, output_size, sparse_values, default_value, validate_indices=True): return sparse_ops.sparse_to_dense( sparse_indices, output_size, sparse_values, default_value=default_value, validate_indices=validate_indices)
def _SparseToDense(sparse_indices, output_size, sparse_values, default_value, validate_indices=True): feed_sparse_indices = array_ops.placeholder(dtypes.int32) feed_dict = {feed_sparse_indices: sparse_indices} return sparse_ops.sparse_to_dense( feed_sparse_indices, output_size, sparse_values, default_value=default_value, validate_indices=validate_indices).eval(feed_dict=feed_dict)
def _apply_transform(self, input_tensors): """Applies the transformation to the `transform_input`. Args: input_tensors: a list of Tensors representing the input to the Transform. Returns: A namedtuple of Tensors representing the transformed output. """ s = input_tensors[0] # pylint: disable=not-callable return self.return_type(sparse_ops.sparse_to_dense( s.indices, s.shape, s.values, default_value=self.default_value))
def _sparse_vs_dense_xent_benchmark_dense(labels, logits): labels = tf.identity(labels) logits = tf.identity(logits) with tf.device("/cpu:0"): # Sparse-to-dense must be on CPU batch_size = tf.shape(logits)[0] num_entries = tf.shape(logits)[1] length = batch_size * num_entries labels += num_entries * tf.range(batch_size) target = sparse_ops.sparse_to_dense(labels, tf.pack([length]), 1.0, 0.0) target = tf.reshape(target, tf.pack([-1, num_entries])) crossent = tf.nn.softmax_cross_entropy_with_logits(logits, target, name="SequenceLoss/CrossEntropy") crossent_sum = tf.reduce_sum(crossent) grads = tf.gradients([crossent_sum], [logits])[0] return (crossent_sum, grads)
def _zero_out_float_grad(op, grad): """The gradients for `zero_out_float`. Args: op: The `zero_out_float` `Operation` that we are differentiating, which we can use to find the inputs and outputs of the original op. grad: Gradient with respect to the output of the `zero_out_float` op. Returns: Gradients with respect to the input of `zero_out_float`. """ to_zero = op.inputs[0] shape = array_ops.shape(to_zero) index = array_ops.zeros_like(shape) first_grad = array_ops.reshape(grad, [-1])[0] to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0) return [to_zero_grad] # List of one Tensor, since we have one input
def _sparse_vs_dense_xent_benchmark_dense(labels, logits): labels = tf.identity(labels) logits = tf.identity(logits) with tf.device("/cpu:0"): # Sparse-to-dense must be on CPU batch_size = tf.shape(logits)[0] num_entries = tf.shape(logits)[1] length = batch_size * num_entries labels += num_entries * tf.range(batch_size) target = sparse_ops.sparse_to_dense(labels, tf.pack([length]), 1.0, 0.0) target = tf.reshape(target, tf.pack([-1, num_entries])) crossent = tf.nn.softmax_cross_entropy_with_logits( logits, target, name="SequenceLoss/CrossEntropy") crossent_sum = tf.reduce_sum(crossent) grads = tf.gradients([crossent_sum], [logits])[0] return (crossent_sum, grads)
def __call__(self, query, previous_alignments): """Score the query based on the keys and values. Args: query: Tensor of dtype matching `self.values` and shape `[batch_size, query_depth]`. previous_alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). Returns: alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). """ with variable_scope.variable_scope(None, "bahdanau_attention", [query]): processed_query = self.query_layer(query) if self.query_layer else query score = _bahdanau_score(processed_query, self._keys, self._normalize) # mask with memory_sequence_length mask_score = _maybe_mask_score(score, self._memory_sequence_length, self._score_mask_value) top_keyword_mask = array_ops.sequence_mask( self._top_alignment_number, maxlen=self.alignments_size) score_mask_index = nn_ops.top_k(mask_score, self.alignments_size, False).indices score_mask_index_reshape = array_ops.reshape( math_ops.cast( array_ops.where(top_keyword_mask, math_ops.cast(score_mask_index, dtypes.float32), array_ops.zeros_like(mask_score)), dtypes.int32), [-1, 1]) temp_index = array_ops.reshape( [i * array_ops.ones([self.alignments_size], dtypes.int32) for i in range(self.batch_size)], [-1, 1]) score_mask_index_final = array_ops.concat([temp_index, score_mask_index_reshape], axis=-1) score_mask_ = sparse_ops.sparse_to_dense( sparse_indices=score_mask_index_final, output_shape=[self.batch_size, self.alignments_size], sparse_values=True, default_value=False, validate_indices=False) score_mask_values_ = self._score_mask_value * array_ops.ones_like(mask_score) keywords_score = array_ops.where(score_mask_, mask_score, score_mask_values_) alignments = nn_ops.softmax(keywords_score) return alignments
def __call__(self, query, previous_alignments): """Score the query based on the keys and values. Args: query: Tensor of dtype matching `self.values` and shape `[batch_size, query_depth]`. previous_alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). Returns: alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). """ with variable_scope.variable_scope(None, "bahdanau_attention", [query]): processed_query = self.query_layer(query) if self.query_layer else query score = _bahdanau_score(processed_query, self._keys, self._normalize) # mask with memory_sequence_length mask_score = _maybe_mask_score(score, self._memory_sequence_length, self._score_mask_value) # choose top_k alignments among dimension 1. replace others with -inf top_k = control_flow_ops.cond(gen_math_ops.less( self.alignments_size, self._top_alignment_number), lambda: self.alignments_size, lambda: self._top_alignment_number) _, score_mask_index = nn_ops.top_k(mask_score, top_k) score_mask_index_final = array_ops.concat( [array_ops.reshape( [i * array_ops.ones([top_k], dtypes.int32) for i in range(self.batch_size)], [-1, 1]), array_ops.reshape(score_mask_index, [-1, 1])], axis=-1) score_mask_ = sparse_ops.sparse_to_dense( sparse_indices=score_mask_index_final, output_shape=[self.batch_size, self.alignments_size], sparse_values=True, default_value=False, validate_indices=False) score_mask_values_ = self._score_mask_value * array_ops.ones_like(mask_score) keywords_score = array_ops.where(score_mask_, mask_score, score_mask_values_) alignments = nn_ops.softmax(keywords_score) return alignments
def testDenseSparseTensorMatMul(self): np.random.seed(42) dense_numpy_array = np.random.rand(3, 3) independent_dense_tf = constant_op.constant(dense_numpy_array, dtype='float32') sp = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 2]], values=[4., 8.], dense_shape=[3, 3]) dense_of_sparse = sparse_ops.sparse_to_dense(sp.indices, sp.shape, sp.values) result = sparse_ops.sparse_tensor_dense_matmul(independent_dense_tf, sp, adjoint_a=False, adjoint_b=False) expected = math_ops.matmul(independent_dense_tf, dense_of_sparse) self.assertAllEqual(expected, result) result = sparse_ops.sparse_tensor_dense_matmul(independent_dense_tf, sp, adjoint_a=False, adjoint_b=True) expected = math_ops.matmul(independent_dense_tf, array_ops.transpose(dense_of_sparse)) self.assertAllEqual(expected, result) result = sparse_ops.sparse_tensor_dense_matmul(independent_dense_tf, sp, adjoint_a=True, adjoint_b=False) expected = math_ops.matmul(array_ops.transpose(independent_dense_tf), dense_of_sparse) self.assertAllEqual(expected, result) result = sparse_ops.sparse_tensor_dense_matmul(independent_dense_tf, sp, adjoint_a=True, adjoint_b=True) expected = math_ops.matmul(array_ops.transpose(independent_dense_tf), array_ops.transpose(dense_of_sparse)) self.assertAllEqual(expected, result)
def _apply_transform(self, input_tensors, **kwargs): """Applies the transformation to the `transform_input`. Args: input_tensors: a list of Tensors representing the input to the Transform. **kwargs: Additional keyword arguments, unused here. Returns: A namedtuple of Tensors representing the transformed output. """ s = input_tensors[0] # pylint: disable=not-callable return self.return_type( sparse_ops.sparse_to_dense(s.indices, s.dense_shape, s.values, default_value=self.default_value))
def _compute_sampled_logits(outfile,weights,biases,inputs,labels,num_sampled,num_classes, num_true=1,sampled_values=None,subtract_log_q=True,remove_accidental_hits=False,partition_strategy="mod",name=None): if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits",weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(true_classes=labels,num_true=num_true,num_sampled=num_sampled,unique=True,range_max=num_classes) sampled, true_expected_count, sampled_expected_count = sampled_values all_ids = array_ops.concat(0, [labels_flat, sampled]) all_w = embedding_ops.embedding_lookup(outfile,weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(outfile,biases, all_ids) true_w = array_ops.slice(all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul(array_ops.expand_dims(inputs, 1),array_ops.reshape(true_w, new_true_w_shape)) dots_as_matrix = array_ops.reshape(row_wise_dots,array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b sampled_w = array_ops.slice(all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits(labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat(1, [acc_indices_2d, acc_ids_2d_int32],"sparse_indices") sampled_logits_shape = array_ops.concat(0,[array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense(sparse_indices,sampled_logits_shape,acc_weights,default_value=0.0,validate_indices=False) if subtract_log_q: true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) out_logits = array_ops.concat(1, [true_logits, sampled_logits]) out_labels = array_ops.concat(1,[array_ops.ones_like(true_logits) / num_true,array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def ctc_decode(y_pred, input_length, max_output_length): """ Cut down from https://github.com/keras-team/keras/blob/master/keras/backend/tensorflow_backend.py#L4170 Decodes the output of a softmax. Uses greedy (best path) search. # Arguments y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. max_output_length: int giving the max output sequence length # Returns List: list of one element that contains the decoded sequence. """ y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon()) input_length = tf.cast((tf.squeeze(input_length, axis=-1)), tf.int32) (decoded, _) = ctc_ops.ctc_greedy_decoder(inputs=y_pred, sequence_length=input_length) sparse = decoded[0] decoded_dense = sparse_ops.sparse_to_dense(sparse.indices, sparse.dense_shape, sparse.values, default_value=-1) # Unfortunately, decoded_dense will be of different number of columns, depending on the decodings. # For use in `predict()`, we need to get it all in one standard shape, so let's pad if necessary. max_length = max_output_length + 2 # giving 2 extra characters for CTC leeway cols = tf.shape(decoded_dense)[-1] def pad(): return tf.pad(decoded_dense, [[0, 0], [0, max_length - cols]], constant_values=-1) def noop(): return decoded_dense return tf.cond(tf.less(cols, max_length), pad, noop)
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape( ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [ array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros([], dtype=dtypes.int32) ]
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape `[num_classes, dim]`. The (possibly-partitioned) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) subtract_log_q: A `bool`. whether to subtract the log expected count of the labels in the sample to get the logits of the true labels. Default is True. Turn off for Negative Sampling. remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is False. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: out_logits, out_labels: `Tensor` objects each with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). """ if not isinstance(weights, list): weights = [weights] with ops.op_scope( weights + [biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice( all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast( acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( 0, [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( 1, [array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def _flat_map_fn(x): return dataset_ops.Dataset.from_tensor_slices( sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values))
def sufficient_statistics(x, axes, shift=False, keep_dims=False, name=None): """Calculate the sufficient statistics for the mean and variance of `x`. These sufficient statistics are computed using the one pass algorithm on an input that's optionally shifted using the value of the 1st element in `x`. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data Unfortunately, in some cases using a random individual sample as the shift value leads experimentally to very poor numerical stability, so it is disabled by default. The one-pass approach might have to be revised accordingly. Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: If true, shift the data to provide more numerically stable results. keep_dims: produce statistics with the same dimensionality as the input. name: Name used to scope the operations that compute the sufficient stats. Returns: Four `Tensor` objects of the same type as `x`: * the count (number of elements to average over). * the (possibly shifted) sum of the elements in the array. * the (possibly shifted) sum of squares of the elements in the array. * the shift by which the mean must be corrected or None if `shift` is False. """ with ops.op_scope([x, axes], name, "sufficient_statistics"): x = ops.convert_to_tensor(x, name="x") x_shape = x.get_shape() if x_shape.is_fully_defined(): counts = 1 m_shape = [] for d in xrange(x_shape.ndims): dim = x_shape[d].value if d in set(axes): counts *= dim dim = 1 m_shape.append(dim) counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. x_shape = array_ops.shape(x) select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape), True, False) m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape), x_shape) counts = math_ops.cast( math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count") if shift: shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape), m_shape) m_ss = math_ops.sub(x, shift_value) v_ss = math_ops.squared_difference(x, shift_value) if keep_dims: shift_value = array_ops.identity(shift_value, name="shift") else: shift_value = array_ops.squeeze(shift_value, squeeze_dims=axes, name="shift") else: # not shift. m_ss = x v_ss = math_ops.square(x) shift_value = None m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss") v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss") return counts, m_ss, v_ss, shift_value
def _compute_sampled_logits(ri_tensors, weights, bias, labels, partition_const, inputs, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None, seed=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes, seed=seed) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) # pylint: enable=unpacking-non-sequence sampled = math_ops.cast(sampled, dtypes.int64) # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat([labels_flat, sampled], 0) # true_ris true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) sampled_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=sampled) true_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) noise_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(sampled_ris), sp_weights=sampled_ris, combiner="sum", partition_strategy=partition_strategy) if bias is not None: sampled_b = embedding_lookup_sparse( params=bias, sp_ids=tx.sparse_indices(sampled_ris), sp_weights=sampled_ris, combiner="sum", partition_strategy=partition_strategy) true_b = embedding_lookup_sparse( params=bias, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) true_w_e = array_ops.reshape(true_w, new_true_w_shape) row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1), true_w_e) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) if bias is not None: true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b noise_logits += sampled_b # TODO need to review how to do this Z # true_logits = true_logits * math_ops.exp(partition_const) if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat([acc_indices_2d, acc_ids_2d_int32], 1, "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)], 0) if noise_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, noise_logits.dtype) noise_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) noise_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, noise_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(noise_logits) ], 1) # out_logits = math_ops.div(out_logits,math_ops.exp(partition_const)) # out_logits = out_logits / (partition_const + 1) return out_logits, out_labels
def _to_dnn_input_layer(self, transformed_input_tensor, weight_collections=None, trainable=True, output_rank=2): """Returns a Tensor as an input to the first layer of neural network. Args: transformed_input_tensor: A tensor that has undergone the transformations in `insert_transformed_feature`. Rank should be >= `output_rank`. unused_weight_collections: Unused. One hot encodings are not variable. unused_trainable: Unused. One hot encodings are not trainable. output_rank: the desired rank of the output `Tensor`. Returns: A outputs Tensor of RNN to be fed into the first layer of neural network. Raises: """ sparse_id_column = self.sparse_id_column.id_tensor(transformed_input_tensor) # pylint: disable=protected-access sparse_id_column = layers._inner_flatten(sparse_id_column, output_rank) batch_size = sparse_id_column.dense_shape[0] dense_id_tensor = sparse_ops.sparse_to_dense(sparse_id_column.indices, [batch_size, self.max_sequence_length], sparse_id_column.values, default_value=0) # dense_id_tensor = gen_array_ops.reshape(dense_id_tensor, [-1, self.max_sequence_length]) if self.shared_embedding_name is not None: shared_embedding_collection_name = ( "SHARED_EMBEDDING_COLLECTION_" + self.shared_embedding_name.upper()) graph = ops.get_default_graph() shared_embedding_collection = ( graph.get_collection_ref(shared_embedding_collection_name)) shape = [self.length, self.embedding_dimension] if shared_embedding_collection: if len(shared_embedding_collection) > 1: raise ValueError( "Collection %s can only contain one " "(partitioned) variable." % shared_embedding_collection_name) else: embeddings = shared_embedding_collection[0] if embeddings.get_shape() != shape: raise ValueError( "The embedding variable with name {} already " "exists, but its shape does not match required " "embedding shape here. Please make sure to use " "different shared_embedding_name for different " "shared embeddings.".format(args.shared_embedding_name)) else: embeddings = contrib_variables.model_variable( name=self.shared_embedding_name, shape=shape, dtype=dtypes.float32, initializer=self.initializer, trainable=(trainable and self.trainable), collections=weight_collections) graph.add_to_collection(shared_embedding_collection_name, embeddings) else: embeddings = contrib_variables.model_variable( name="weights", shape=[self.length, self.embedding_dimension], dtype=dtypes.float32, initializer=self.initializer, trainable=(trainable and self.trainable), collections=weight_collections) if _is_variable(embeddings): embeddings = [embeddings] else: embeddings = embeddings._get_variable_list() # pylint: disable=protected-access embedding_inputs = embedding_lookup( embeddings, dense_id_tensor, max_norm=self.max_norm) dropout = (self.dropout_keep_probabilities if self.mode == model_fn.ModeKeys.TRAIN else None) sequence_length = self._sequence_length(dense_id_tensor) if bidirectional_rnn: cell_fw = rnn_common.construct_rnn_cell(self.num_units, self.cell_type, dropout) cell_bw = rnn_common.construct_rnn_cell(self.num_units, self.cell_type, dropout) _rnn_outputs, _ = rnn.bidirectional_dynamic_rnn(cell_fw, cell_bw, embedding_inputs, sequence_length=sequence_length, dtype=dtypes.float32) rnn_outputs = array_ops.concat(_rnn_outputs, axis=2) else: cell = rnn_common.construct_rnn_cell(self.num_units, self.cell_type, dropout) rnn_outputs, _ = rnn.dynamic_rnn(cell, embedding_inputs, sequence_length=sequence_length, dtype=dtypes.float32) return self._extract_last_relevent(rnn_outputs, sequence_length)
def test2d(self): tf_ans = sparse_ops.sparse_to_dense([[1, 3], [2, 0]], [3, 4], 1, -1) np_ans = np.array([[-1, -1, -1, -1], [-1, -1, -1, 1], [1, -1, -1, -1]]).astype(np.int32) self.assertAllClose(np_ans, tf_ans)
def testSetSingleValue(self): tf_ans = sparse_ops.sparse_to_dense([1, 3], [5], 1, -1) np_ans = np.array([-1, 1, -1, 1, -1]).astype(np.int32) self.assertAllClose(np_ans, tf_ans)
def _test_set_intersection_3d(self, dtype, invalid_indices=False): if invalid_indices: indices = constant_op.constant( [ [0, 1, 0], [0, 1, 1], # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], [1, 1, 2], # 1,1 [0, 0, 0], [0, 0, 2], # 0,0 # 2,0 [2, 1, 1] # 2,1 # 3,* ], dtypes.int64) else: indices = constant_op.constant( [ [0, 0, 0], [0, 0, 2], # 0,0 [0, 1, 0], [0, 1, 1], # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], [1, 1, 2], # 1,1 # 2,0 [2, 1, 1] # 2,1 # 3,* ], dtypes.int64) sp_a = sparse_tensor_lib.SparseTensor( indices, _constant( [ 1, 9, # 0,0 3, 3, # 0,1 1, # 1,0 9, 7, 8, # 1,1 # 2,0 5 # 2,1 # 3,* ], dtype), constant_op.constant([4, 2, 3], dtypes.int64)) sp_b = sparse_tensor_lib.SparseTensor( constant_op.constant( [ [0, 0, 0], [0, 0, 3], # 0,0 # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], # 1,1 [2, 0, 1], # 2,0 [2, 1, 1], # 2,1 [3, 0, 0], # 3,0 [3, 1, 0] # 3,1 ], dtypes.int64), _constant( [ 1, 3, # 0,0 # 0,1 3, # 1,0 7, 8, # 1,1 2, # 2,0 5, # 2,1 4, # 3,0 4 # 3,1 ], dtype), constant_op.constant([4, 2, 4], dtypes.int64)) if invalid_indices: with self.assertRaisesRegexp(errors_impl.OpError, "out of order"): self._set_intersection(sp_a, sp_b) else: expected_indices = [ [0, 0, 0], # 0,0 # 0,1 # 1,0 [1, 1, 0], [1, 1, 1], # 1,1 # 2,0 [2, 1, 0], # 2,1 # 3,* ] expected_values = _values( [ 1, # 0,0 # 0,1 # 1,0 7, 8, # 1,1 # 2,0 5, # 2,1 # 3,* ], dtype) expected_shape = [4, 2, 2] expected_counts = [ [ 1, # 0,0 0 # 0,1 ], [ 0, # 1,0 2 # 1,1 ], [ 0, # 2,0 1 # 2,1 ], [ 0, # 3,0 0 # 3,1 ] ] # Sparse to sparse. intersection = self._set_intersection(sp_a, sp_b) self._assert_set_operation(expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(sp_a, sp_b)) # NOTE: sparse_to_dense doesn't support uint8 and uint16. if dtype not in [dtypes.uint8, dtypes.uint16]: # Dense to sparse. a = math_ops.cast(sparse_ops.sparse_to_dense( sp_a.indices, sp_a.dense_shape, sp_a.values, default_value="-1" if dtype == dtypes.string else -1), dtype=dtype) intersection = self._set_intersection(a, sp_b) self._assert_set_operation(expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(a, sp_b)) # Dense to dense. b = math_ops.cast(sparse_ops.sparse_to_dense( sp_b.indices, sp_b.dense_shape, sp_b.values, default_value="-2" if dtype == dtypes.string else -2), dtype=dtype) intersection = self._set_intersection(a, b) self._assert_set_operation(expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(a, b))
def testEmptyNonZeros(self): indices = array_ops.constant([], dtype=dtypes.int32) values = array_ops.constant([], dtype=dtypes.float32) tf_ans = sparse_ops.sparse_to_dense(indices, [5], values, 0.0) np_ans = np.array([0, 0, 0, 0, 0]).astype(np.float32) self.assertAllClose(np_ans, tf_ans)
def testFloat(self): tf_ans = sparse_ops.sparse_to_dense([1, 3], [5], 1.0, 0.0) np_ans = np.array([0, 1, 0, 1, 0]).astype(np.float32) self.assertAllClose(np_ans, tf_ans)
def testShapeInferenceUnknownShape(self): with ops.Graph().as_default(): indices = array_ops.placeholder(dtypes.int64) shape = array_ops.placeholder(dtypes.int64) output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertIsNone(output.get_shape().ndims)
def testBadDefault(self): with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "default_value should be a scalar"): self.evaluate(sparse_ops.sparse_to_dense([1, 3], [5], [1, 2], [0]))
def testZeroDefault(self): with self.cached_session(): x = sparse_ops.sparse_to_dense(2, [4], 7).eval() self.assertAllEqual(x, [0, 0, 7, 0])
def testZeroDefault(self): x = sparse_ops.sparse_to_dense(2, [4], 7) self.assertAllEqual(x, [0, 0, 7, 0])
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: tensor of label embeddings with shape = [num_classes, dim] biases: tensor of num_classes label biases inputs: tensor with shape = [batch_size, dim] corresponding to forward activations of the input network labels: int tensor with shape [batch_size, num_true] num_sampled: number of label classes to sample per batch num_classes: number of possible label classes in the data (e.g. vocab size) num_true: number of target classes per example (default: 1) sampled_values: a tuple of (sampled_candidates, true_expected_count, sampled_expected_count) returned by a *CandidateSampler function to use (if None, we default to LogUniformCandidateSampler) subtract_log_q: subtract the log expected count of the labels in the sample to get the logits of the true labels (default: True) Turn off for Negative Sampling. remove_accidental_hits: whether to remove "accidental hits" where a sampled label equals the true labels (bool, default: False) name: name for this op Returns: out_logits, out_labels: tensors with shape [batch_size, num_true + num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE) or SoftmaxCrossEntropyWithLogits (sampled softmax). """ with ops.op_scope([weights, biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != types.int64: labels = math_ops.cast(labels, types.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: num_sampled vector # true_expected_count shape = [batch_size, 1] # sampled_expected_count shape = num_sampled vector if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # weights shape is [num_classes, dim] # labels_flat is a [batch_size * num_true] vector # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] vector true_w = embedding_ops.embedding_lookup(weights, labels_flat) true_b = embedding_ops.embedding_lookup(biases, labels_flat) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled is a num_sampled int vector # sampled_w shape is [num_sampled, dim] # sampled_b is a num_sampled float vector sampled_w = embedding_ops.embedding_lookup(weights, sampled) sampled_b = embedding_ops.embedding_lookup(biases, sampled) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, types.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, 0.0) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels
def testBadShape(self): with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "must be rank 1"): sparse_ops.sparse_to_dense([1, 3], [[5], [3]], 1, -1)
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice(all_w, [0, 0], array_ops.pack( [array_ops.shape(labels_flat)[0], -1])) # 128*128 true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), # 128*1*128 array_ops.reshape(true_w, new_true_w_shape)) # 128*1*128 # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels
def _test_set_intersection_3d(self, dtype, invalid_indices=False): if invalid_indices: indices = constant_op.constant( [ [0, 1, 0], [0, 1, 1], # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], [1, 1, 2], # 1,1 [0, 0, 0], [0, 0, 2], # 0,0 # 2,0 [2, 1, 1] # 2,1 # 3,* ], dtypes.int64) else: indices = constant_op.constant( [ [0, 0, 0], [0, 0, 2], # 0,0 [0, 1, 0], [0, 1, 1], # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], [1, 1, 2], # 1,1 # 2,0 [2, 1, 1] # 2,1 # 3,* ], dtypes.int64) sp_a = sparse_tensor_lib.SparseTensor( indices, _constant( [ 1, 9, # 0,0 3, 3, # 0,1 1, # 1,0 9, 7, 8, # 1,1 # 2,0 5 # 2,1 # 3,* ], dtype), constant_op.constant([4, 2, 3], dtypes.int64)) sp_b = sparse_tensor_lib.SparseTensor( constant_op.constant( [ [0, 0, 0], [0, 0, 3], # 0,0 # 0,1 [1, 0, 0], # 1,0 [1, 1, 0], [1, 1, 1], # 1,1 [2, 0, 1], # 2,0 [2, 1, 1], # 2,1 [3, 0, 0], # 3,0 [3, 1, 0] # 3,1 ], dtypes.int64), _constant( [ 1, 3, # 0,0 # 0,1 3, # 1,0 7, 8, # 1,1 2, # 2,0 5, # 2,1 4, # 3,0 4 # 3,1 ], dtype), constant_op.constant([4, 2, 4], dtypes.int64)) if invalid_indices: with self.assertRaisesRegexp(errors_impl.OpError, "out of order"): self._set_intersection(sp_a, sp_b) else: expected_indices = [ [0, 0, 0], # 0,0 # 0,1 # 1,0 [1, 1, 0], [1, 1, 1], # 1,1 # 2,0 [2, 1, 0], # 2,1 # 3,* ] expected_values = _values( [ 1, # 0,0 # 0,1 # 1,0 7, 8, # 1,1 # 2,0 5, # 2,1 # 3,* ], dtype) expected_shape = [4, 2, 2] expected_counts = [ [ 1, # 0,0 0 # 0,1 ], [ 0, # 1,0 2 # 1,1 ], [ 0, # 2,0 1 # 2,1 ], [ 0, # 3,0 0 # 3,1 ] ] # Sparse to sparse. intersection = self._set_intersection(sp_a, sp_b) self._assert_set_operation( expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(sp_a, sp_b)) # NOTE: sparse_to_dense doesn't support uint8 and uint16. if dtype not in [dtypes.uint8, dtypes.uint16]: # Dense to sparse. a = math_ops.cast( sparse_ops.sparse_to_dense( sp_a.indices, sp_a.dense_shape, sp_a.values, default_value="-1" if dtype == dtypes.string else -1), dtype=dtype) intersection = self._set_intersection(a, sp_b) self._assert_set_operation( expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(a, sp_b)) # Dense to dense. b = math_ops.cast( sparse_ops.sparse_to_dense( sp_b.indices, sp_b.dense_shape, sp_b.values, default_value="-2" if dtype == dtypes.string else -2), dtype=dtype) intersection = self._set_intersection(a, b) self._assert_set_operation( expected_indices, expected_values, expected_shape, intersection, dtype=dtype) self.assertAllEqual(expected_counts, self._set_intersection_count(a, b))
def sequence_loss_by_example(logits, targets, weights, num_decoder_symbols, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: list of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: list of 1D batch-sized int32 Tensors of the same length as logits. weights: list of 1D batch-sized float-Tensors of the same length as logits. num_decoder_symbols: integer, number of decoder symbols (output classes). average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: the log-perplexity for each sequence. Raises: ValueError: if len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError( "Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.op_scope(logits + targets + weights, name, "sequence_loss_by_example"): batch_size = array_ops.shape(targets[0])[0] log_perp_list = [] length = batch_size * num_decoder_symbols for i in xrange(len(logits)): if softmax_loss_function is None: # TODO(lukaszkaiser): There is no SparseCrossEntropy in TensorFlow, so # we need to first cast targets into a dense representation, and as # SparseToDense does not accept batched inputs, we need to do this by # re-indexing and re-sizing. When TensorFlow adds SparseCrossEntropy, # rewrite this method. indices = targets[i] + num_decoder_symbols * math_ops.range( batch_size) with ops.device( "/cpu:0"): # Sparse-to-dense must be on CPU for now. dense = sparse_ops.sparse_to_dense( indices, array_ops.expand_dims(length, 0), 1.0, 0.0) target = array_ops.reshape(dense, [-1, num_decoder_symbols]) crossent = nn_ops.softmax_cross_entropy_with_logits( logits[i], target, name="SequenceLoss/CrossEntropy{0}".format(i)) else: crossent = softmax_loss_function(logits[i], targets[i]) log_perp_list.append(crossent * weights[i]) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: tensor of label embeddings with shape = [num_classes, dim] biases: tensor of num_classes label biases inputs: tensor with shape = [batch_size, dim] corresponding to forward activations of the input network labels: int tensor with shape [batch_size, num_true] num_sampled: number of label classes to sample per batch num_classes: number of possible label classes in the data (e.g. vocab size) num_true: number of target classes per example (default: 1) sampled_values: a tuple of (sampled_candidates, true_expected_count, sampled_expected_count) returned by a *CandidateSampler function to use (if None, we default to LogUniformCandidateSampler) subtract_log_q: subtract the log expected count of the labels in the sample to get the logits of the true labels (default: True) Turn off for Negative Sampling. remove_accidental_hits: whether to remove "accidental hits" where a sampled label equals the true labels (bool, default: False) name: name for this op Returns: out_logits, out_labels: tensors with shape [batch_size, num_true + num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE) or SoftmaxCrossEntropyWithLogits (sampled softmax). """ with ops.op_scope( [weights, biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != types.int64: labels = math_ops.cast(labels, types.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: num_sampled vector # true_expected_count shape = [batch_size, 1] # sampled_expected_count shape = num_sampled vector if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # weights shape is [num_classes, dim] # labels_flat is a [batch_size * num_true] vector # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] vector true_w = embedding_ops.embedding_lookup(weights, labels_flat) true_b = embedding_ops.embedding_lookup(biases, labels_flat) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled is a num_sampled int vector # sampled_w shape is [num_sampled, dim] # sampled_b is a num_sampled float vector sampled_w = embedding_ops.embedding_lookup(weights, sampled) sampled_b = embedding_ops.embedding_lookup(biases, sampled) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast( acc_ids, types.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( 0, [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, 0.0) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( 1, [array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def sufficient_statistics(x, axes, shift=True, keep_dims=False, name=None): """Calculate the sufficient statistics for the mean and variance of `x`. These sufficient statistics are computed using the one pass algorithm on an input that's optionally shifted using the value of the 1st element in `x`. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: If true, shift the data to provide more numerically stable results. keep_dims: produce statistics with the same dimensionality as the input. name: Name used to scope the operations that compute the sufficient stats. Returns: Four `Tensor` objects of the same type as `x`: * the count (number of elements to average over). * the (possibly shifted) sum of the elements in the array. * the (possibly shifted) sum of squares of the elements in the array. * the shift by which the mean must be corrected or None if `shift` is False. """ with ops.op_scope([x, axes], name, "sufficient_statistics"): x = ops.convert_to_tensor(x, name="x") x_shape = x.get_shape() if x_shape.is_fully_defined(): counts = 1 m_shape = [] for d in xrange(x_shape.ndims): dim = x_shape[d].value if d in set(axes): counts *= dim dim = 1 m_shape.append(dim) counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. x_shape = array_ops.shape(x) select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape), True, False) m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape), x_shape) counts = math_ops.cast(math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count") if shift: shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape), m_shape) m_ss = math_ops.sub(x, shift_value) v_ss = math_ops.squared_difference(x, shift_value) if keep_dims: shift_value = array_ops.identity(shift_value, name="shift") else: shift_value = array_ops.squeeze(shift_value, squeeze_dims=axes, name="shift") else: # not shift. m_ss = x v_ss = math_ops.square(x) shift_value = None m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss") v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss") return counts, m_ss, v_ss, shift_value
def testShapeInferenceUnknownShape(self): with self.test_session(use_gpu=False): indices = array_ops.placeholder(dtypes.int64) shape = array_ops.placeholder(dtypes.int64) output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertEqual(output.get_shape().ndims, None)
def testShapeInferenceUnknownShape(self): with self.session(use_gpu=False): indices = array_ops.placeholder(dtypes.int64) shape = array_ops.placeholder(dtypes.int64) output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertEqual(output.get_shape().ndims, None)
def testString(self): tf_ans = sparse_ops.sparse_to_dense([1, 3], [5], "a", "b") np_ans = np.array(["b", "a", "b", "a", "b"]).astype(np.string_) self.assertAllEqual(np_ans, tf_ans)
def _compute_sampled_logits(self, weights, biases, labels, inputs, num_sampled, num_classes, transmissibility, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None, seed=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) if labels.shape.ndims == 1: labels = array_ops.expand_dims(labels, -1) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor # num_sampled 字典大小 if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes, seed=seed) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) # pylint: enable=unpacking-non-sequence sampled = math_ops.cast(sampled, dtypes.int64) # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat([labels_flat, sampled], 0) # Retrieve the true weights and the logits of the sampled weights. # weights shape is [num_classes, dim] # 128个相似节点对和 5个非相似节点(也就是128*5个非相似节点对) all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) # true_w shape is [batch_size * num_true, dim] - > 128 * 100 true_w = array_ops.slice( all_w, [0, 0], array_ops.stack([array_ops.shape(labels_flat)[0], -1])) # 5 * 100 sampled_w = array_ops.slice( all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # Apply X*W', which yields [batch_size, num_sampled] # 128个输入节点分别和这5个非相似节点,进行比较, 128 * 5, 表示节点a和节点b的相似度. sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) # Retrieve the true and sampled biases, compute the true logits, and # add the biases to the true and sampled logits. all_b = embedding_ops.embedding_lookup( biases, all_ids, partition_strategy=partition_strategy) # true_b is a [batch_size * num_true] tensor # sampled_b is a [num_sampled] float tensor true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) row_wise_dots = math_ops.multiply( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape( row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(self._sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) # 相似节点对,对比结果是128*1;非相似节点对,对比结果是128*5 true_logits += true_b sampled_logits += sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( [acc_indices_2d, acc_ids_2d_int32], 1, "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat([ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ], 0) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( [ transmissibility, # array_ops.ones_like(true_logits) / num_true, # array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels