def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. Args: decoder_inputs: Inputs for decoder, list of tensors. This is used only in training sub-graph. initial_state: Initial state for the decoder. cell: RNN cell to use for decoder. scope: Scope to use, if None new will be produced. Returns: List of tensors for outputs and states for training and sampling sub-graphs. """ with vs.variable_scope(scope or "dnn_decoder"): states, sampling_states = [initial_state], [initial_state] outputs, sampling_outputs = [], [] with ops.op_scope([decoder_inputs, initial_state], "training"): for i, inp in enumerate(decoder_inputs): if i > 0: vs.get_variable_scope().reuse_variables() output, new_state = cell(inp, states[-1]) outputs.append(output) states.append(new_state) with ops.op_scope([initial_state], "sampling"): for i, _ in enumerate(decoder_inputs): if i == 0: sampling_outputs.append(outputs[i]) sampling_states.append(states[i]) else: sampling_output, sampling_state = cell(sampling_outputs[-1], sampling_states[-1]) sampling_outputs.append(sampling_output) sampling_states.append(sampling_state) return outputs, states, sampling_outputs, sampling_states
def __init__(self, example_indices, feature_indices, feature_values): """Creates a `SparseFeatureColumn` representation. Args: example_indices: A 1-D int64 tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. feature_indices: A 1-D int64 tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. feature_values: An optional 1-D tensor float tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. Returns: A `SparseFeatureColumn` """ with op_scope([example_indices, feature_indices], None, 'SparseFeatureColumn'): self._example_indices = convert_to_tensor(example_indices, name='example_indices', dtype=dtypes.int64) self._feature_indices = convert_to_tensor(feature_indices, name='feature_indices', dtype=dtypes.int64) self._feature_values = None if feature_values is not None: with op_scope([feature_values], None, 'SparseFeatureColumn'): self._feature_values = convert_to_tensor(feature_values, name='feature_values', dtype=dtypes.float32)
def testEmptyScopeName(self): g0 = ops.Graph() a = g0.create_op("a", [], [dtypes.float32]) b = g0.create_op("b", [], [dtypes.float32]) with ops.op_scope([a, b], "") as scope: self.assertEqual("", scope) self.assertEqual(g0, ops.get_default_graph()) with ops.op_scope([a, b], "", "my_default_scope") as scope: self.assertEqual("", scope) self.assertEqual(g0, ops.get_default_graph())
def _testGraphElements(self, graph_elements): scope_name = "my_scope" with ops.op_scope(graph_elements, scope_name) as scope: self.assertEqual("%s/" % scope_name, scope) self.assertEqual(graph_elements[0].graph, ops.get_default_graph()) g1 = ops.Graph() c = g1.create_op("c", [], [dtypes.float32]) with self.assertRaises(ValueError): with ops.op_scope(graph_elements + [c], scope_name): pass
def testNoScopeName(self): g0 = ops.Graph() values = [ g0.create_op("a", [], [dtypes.float32]), g0.create_op("b", [], [dtypes.float32])] with self.assertRaises(ValueError): with ops.op_scope(values, None): pass with self.assertRaises(ValueError): with ops.op_scope(values, None, None): pass
def testDefaultScopeName(self): g0 = ops.Graph() a = g0.create_op("a", [], [dtypes.float32]) b = g0.create_op("b", [], [dtypes.float32]) scope_name = "my_scope" default_scope_name = "my_default_scope" with ops.op_scope([a, b], scope_name, default_scope_name) as scope: self.assertEqual("%s/" % scope_name, scope) self.assertEqual(g0, ops.get_default_graph()) with ops.op_scope([a, b], None, default_scope_name) as scope: self.assertEqual("%s/" % default_scope_name, scope) self.assertEqual(g0, ops.get_default_graph())
def string_input_producer(string_tensor, num_epochs=None, shuffle=True, seed=None, capacity=32, name=None): """Output strings (e.g. filenames) to a queue for an input pipeline. Args: string_tensor: A 1-D string tensor with the strings to produce. num_epochs: An integer (optional). If specified, `string_input_producer` produces each string from `string_tensor` `num_epochs` times before generating an OutOfRange error. If not specified, `string_input_producer` can cycle through the strings in `string_tensor` an unlimited number of times. shuffle: Boolean. If true, the strings are randomly shuffled within each epoch. seed: An integer (optional). Seed used if shuffle == True. capacity: An integer. Sets the queue capacity. name: A name for the operations (optional). Returns: A queue with the output strings. A `QueueRunner` for the Queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. """ with ops.op_scope([string_tensor], name, "input_producer") as name: return _input_producer( string_tensor, dtypes.string, num_epochs, shuffle, seed, capacity, name, "fraction_of_%d_full" % capacity)
def mn_i(weights, name=None): """Applies max-norm regularization to weights.""" with ops.op_scope([weights], name, 'maxnorm_o_regularizer') as scope: my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Uniform Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, n], name): n = ops.convert_to_tensor(n, name="n") n_val = tensor_util.constant_value(n) shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()]) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) samples.set_shape(inferred_shape) return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims( self.range(), 0) * samples)
def range_input_producer(limit, num_epochs=None, shuffle=True, seed=None, capacity=32, name=None): """Produces the integers from 0 to limit-1 in a queue. Args: limit: An int32 scalar tensor. num_epochs: An integer (optional). If specified, `range_input_producer` produces each integer `num_epochs` times before generating an OutOfRange error. If not specified, `range_input_producer` can cycle through the integers an unlimited number of times. shuffle: Boolean. If true, the integers are randomly shuffled within each epoch. seed: An integer (optional). Seed used if shuffle == True. capacity: An integer. Sets the queue capacity. name: A name for the operations (optional). Returns: A Queue with the output integers. A `QueueRunner` for the Queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. """ with ops.op_scope([limit], name, "input_producer") as name: range_tensor = math_ops.range(limit) return _input_producer( range_tensor, dtypes.int32, num_epochs, shuffle, seed, capacity, name, "fraction_of_%d_full" % capacity)
def surrogate_loss(sample_losses, stochastic_tensors=None, name="SurrogateLoss"): """Surrogate loss for stochastic graphs. This function will call `loss_fn` on each `StochasticTensor` upstream of `sample_losses`, passing the losses that it influenced. Note that currently `surrogate_loss` does not work with `StochasticTensor`s instantiated in `while_loop`s or other control structures. Args: sample_losses: a list or tuple of final losses. Each loss should be per example in the batch (and possibly per sample); that is, it should have dimensionality of 1 or greater. All losses should have the same shape. stochastic_tensors: a list of `StochasticTensor`s to add loss terms for. If None, defaults to all `StochasticTensor`s in the graph upstream of the `Tensor`s in `sample_losses`. name: the name with which to prepend created ops. Returns: `Tensor` loss, which is the sum of `sample_losses` and the `loss_fn`s returned by the `StochasticTensor`s. Raises: TypeError: if `sample_losses` is not a list or tuple, or if its elements are not `Tensor`s. ValueError: if any loss in `sample_losses` does not have dimensionality 1 or greater. """ with ops.op_scope(sample_losses, name): fixed_losses = [] if not isinstance(sample_losses, (list, tuple)): raise TypeError("sample_losses must be a list or tuple") for loss in sample_losses: if not isinstance(loss, ops.Tensor): raise TypeError("loss is not a Tensor: %s" % loss) ndims = loss.get_shape().ndims if not (ndims is not None and ndims >= 1): raise ValueError("loss must have dimensionality 1 or greater: %s" % loss) fixed_losses.append(array_ops.stop_gradient(loss)) stoch_dependencies_map = _stochastic_dependencies_map( fixed_losses, stochastic_tensors=stochastic_tensors) if not stoch_dependencies_map: logging.warn( "No collection of Stochastic Tensors found for current graph.") return math_ops.add_n(sample_losses) # Iterate through all of the stochastic dependencies, adding # surrogate terms where necessary. sample_losses = [ops.convert_to_tensor(loss) for loss in sample_losses] loss_terms = sample_losses for (stoch_node, dependent_losses) in stoch_dependencies_map.items(): loss_term = stoch_node.loss(list(dependent_losses)) if loss_term is not None: loss_terms.append(loss_term) return math_ops.add_n(loss_terms)
def pdf(self, x, name="pdf"): """The PDF of observations in `x` under these Uniform distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `a` and `b`. name: The name to give this op. Returns: pdf: tensor of dtype `dtype`, the pdf values of `x`. If `x` is `nan`, will return `nan`. """ with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, x], name): x = ops.convert_to_tensor(x, name="x") if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) broadcasted_x = x * self._ones() return math_ops.select( math_ops.is_nan(broadcasted_x), broadcasted_x, math_ops.select( math_ops.logical_or(broadcasted_x < self.a, broadcasted_x > self.b), array_ops.zeros_like(broadcasted_x), (1.0 / self.range()) * array_ops.ones_like(broadcasted_x)))
def enqueue(self, vals, name=None): """Enqueues one element to this queue. If the queue is full when this operation executes, it will block until the element has been enqueued. Args: vals: The tuple of `Tensor` objects to be enqueued. name: A name for the operation (optional). Returns: The operation that enqueues a new tuple of tensors to the queue. """ if not isinstance(vals, (list, tuple)): vals = [vals] with ops.op_scope(vals, name, "%s_enqueue" % self._name) as scope: vals = self._check_enqueue_dtypes(vals) # NOTE(mrry): Not using a shape function because we need access to # the `QueueBase` object. for val, shape in zip(vals, self._shapes): val.get_shape().assert_is_compatible_with(shape) return gen_data_flow_ops._queue_enqueue(self._queue_ref, vals, name=scope)
def enqueue_many(self, vals, name=None): """Enqueues zero or elements to this queue. This operation slices each component tensor along the 0th dimension to make multiple queue elements. All of the tensors in `vals` must have the same size in the 0th dimension. If the queue is full when this operation executes, it will block until all of the elements have been enqueued. Args: vals: The tensor or tuple of tensors from which the queue elements are taken. name: A name for the operation (optional). Returns: The operation that enqueues a batch of tuples of tensors to the queue. """ if not isinstance(vals, (list, tuple)): vals = [vals] with ops.op_scope(vals, name, "%s_EnqueueMany" % self._name) as scope: vals = self._check_enqueue_dtypes(vals) # NOTE(mrry): Not using a shape function because we need access to # the `QueueBase` object. batch_dim = vals[0].get_shape().with_rank_at_least(1)[0] for val, shape in zip(vals, self._shapes): batch_dim = batch_dim.merge_with( val.get_shape().with_rank_at_least(1)[0]) val.get_shape()[1:].assert_is_compatible_with(shape) return gen_data_flow_ops._queue_enqueue_many( self._queue_ref, vals, name=scope)
def one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0, outputs_collections=None, scope=None): """Transform numeric labels into onehot_labels using tf.one_hot. Args: labels: [batch_size] target labels. num_classes: total number of classes. on_value: A scalar defining the on-value. off_value: A scalar defining the off-value. outputs_collections: collection to add the outputs. scope: Optional scope for op_scope. Returns: one hot encoding of the labels. """ with ops.op_scope([labels, num_classes], scope, 'OneHotEncoding') as sc: if labels.dtype == dtypes.int32: labels = standard_ops.to_int64(labels) outputs = standard_ops.one_hot(labels, num_classes, on_value=on_value, off_value=off_value) return utils.collect_named_outputs(outputs_collections, sc, outputs)
def avg_pool2d(inputs, kernel_size, stride=2, padding='VALID', outputs_collections=None, scope=None): """Adds a Avg Pooling op. It is assumed by the wrapper that the pooling is only done per image and not in depth or batch. Args: inputs: a tensor of size [batch_size, height, width, depth]. kernel_size: a list of length 2: [kernel_height, kernel_width] of the pooling kernel over which the op is computed. Can be an int if both values are the same. stride: a list of length 2: [stride_height, stride_width]. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: the padding method, either 'VALID' or 'SAME'. outputs_collections: collection to add the outputs. scope: Optional scope for op_scope. Returns: a tensor representing the results of the pooling operation. """ with ops.op_scope([inputs], scope, 'AvgPool2D') as sc: kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) outputs = nn.avg_pool(inputs, ksize=[1, kernel_h, kernel_w, 1], strides=[1, stride_h, stride_w, 1], padding=padding) return utils.collect_named_outputs(outputs_collections, sc, outputs)
def ones(shape, dtype=dtypes.float32, name=None): """Creates a tensor with all elements set to 1. This operation returns a tensor of type `dtype` with shape `shape` and all elements set to 1. For example: ```python tf.ones([2, 3], int32) ==> [[1, 1, 1], [1, 1, 1]] ``` Args: shape: Either a list of integers, or a 1-D `Tensor` of type `int32`. dtype: The type of an element in the resulting `Tensor`. name: A name for the operation (optional). Returns: A `Tensor` with all elements set to 1. """ with ops.op_scope([shape], name, "ones") as name: if isinstance(shape, list): output = constant(1, shape=shape, dtype=dtype, name=name) else: shape = ops.convert_to_tensor(shape, name="shape") output = fill(shape, constant(1, dtype=dtype), name=name) assert output.dtype.base_dtype == dtypes.as_dtype(dtype).base_dtype return output
def dropout(inputs, keep_prob=0.5, noise_shape=None, is_training=True, outputs_collections=None, scope=None): """Returns a dropout op applied to the input. With probability `keep_prob`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. Args: inputs: the tensor to pass to the nn.dropout op. keep_prob: A scalar `Tensor` with the same type as x. The probability that each element is kept. noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for randomly generated keep/drop flags. is_training: A bool `Tensor` indicating whether or not the model is in training mode. If so, dropout is applied and values scaled. Otherwise, inputs is returned. outputs_collections: collection to add the outputs. scope: Optional scope for op_scope. Returns: a tensor representing the output of the operation. """ with ops.op_scope([inputs], scope, 'Dropout') as sc: is_training = ops.convert_to_tensor(is_training) outputs = control_flow_ops.cond( is_training, lambda: nn.dropout(inputs, keep_prob, noise_shape), lambda: inputs) return utils.collect_named_outputs(outputs_collections, sc, outputs)
def complex(real, imag, name=None): """Converts two real numbers to a complex number. Given a tensor `real` representing the real part of a complex number, and a tensor `imag` representing the imaginary part of a complex number, this operation computes complex numbers elementwise of the form \\\\(a + bj\\\\), where *a* represents the `real` part and *b* represents the `imag` part. The input tensors `real` and `imag` must be the same shape. For example: ``` # tensor 'real' is [2.25, 3.25] # tensor `imag` is [4.75, 5.75] tf.complex(real, imag) ==> [[2.25 + 4.74j], [3.25 + 5.75j]] ``` Args: real: A `Tensor` of type `float`. imag: A `Tensor` of type `float`. name: A name for the operation (optional). Returns: A `Tensor` of type `complex64`. """ with ops.op_scope([real, imag], name, "Complex") as name: return gen_math_ops._complex(real, imag, name=name)
def log_prob(self, x, name="log_prob"): """Log prob of observations in `x` under these Gamma distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `alpha` and `beta`. name: The name to give this op. Returns: log_prob: tensor of dtype `dtype`, the log-PDFs of `x`. Raises: TypeError: if `x` and `alpha` are different dtypes. """ with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._beta, x], name): alpha = self._alpha beta = self._beta x = ops.convert_to_tensor(x) x = control_flow_ops.with_dependencies( [check_ops.assert_positive(x)] if self.strict else [], x) contrib_tensor_util.assert_same_float_dtype(tensors=[x,], dtype=self.dtype) return (alpha * math_ops.log(beta) + (alpha - 1) * math_ops.log(x) - beta * x - math_ops.lgamma(self._alpha))
def ones_like(tensor, dtype=None, name=None): """Creates a tensor with all elements set to 1. Given a single tensor (`tensor`), this operation returns a tensor of the same type and shape as `tensor` with all elements set to 1. Optionally, you can specify a new type (`dtype`) for the returned tensor. For example: ```python # 'tensor' is [[1, 2, 3], [4, 5, 6]] tf.ones_like(tensor) ==> [[1, 1, 1], [1, 1, 1]] ``` Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, `int8`, `int16`, `int32`, `int64`, `uint8`, or `complex64`. name: A name for the operation (optional). Returns: A `Tensor` with all elements set to 1. """ with ops.op_scope([tensor], name, "ones_like") as name: tensor = ops.convert_to_tensor(tensor, name="tensor") ones_shape = shape(tensor) if dtype is None: dtype = tensor.dtype return ones(ones_shape, dtype=dtype, name=name)
def _strict_conv1d(x, h): """Return x * h for rank 1 tensors x and h.""" with ops.op_scope([x, h], 'strict_conv1d'): x = array_ops.reshape(x, (1, -1, 1, 1)) h = array_ops.reshape(h, (-1, 1, 1, 1)) result = nn_ops.conv2d(x, h, [1, 1, 1, 1], 'SAME') return array_ops.reshape(result, [-1])
def sequence_loss(logits, targets, weights, num_decoder_symbols, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. num_decoder_symbols: Integer, number of decoder symbols (output classes). average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.op_scope(logits + targets + weights, name, "sequence_loss"): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, num_decoder_symbols, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def matmul(a, b, transpose_a=False, transpose_b=False, a_is_sparse=False, b_is_sparse=False, name=None): """Multiplies matrix `a` by matrix `b`, producing `a` * `b`. The inputs must be two-dimensional matrices, with matching inner dimensions, possibly after transposition. Both matrices must be of the same type. The supported types are: `float`, `double`, `int32`, `complex64`. Either matrix can be transposed on the fly by setting the corresponding flag to `True`. This is `False` by default. If one or both of the matrices contain a lot of zeros, a more efficient multiplication algorithm can be used by setting the corresponding `a_is_sparse` or `b_is_sparse` flag to `True`. These are `False` by default. For example: ```python # 2-D tensor `a` a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3]) => [[1. 2. 3.] [4. 5. 6.]] # 2-D tensor `b` b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2]) => [[7. 8.] [9. 10.] [11. 12.]] c = tf.matmul(a, b) => [[58 64] [139 154]] ``` Args: a: `Tensor` of type `float`, `double`, `int32` or `complex64`. b: `Tensor` with same type as `a`. transpose_a: If `True`, `a` is transposed before multiplication. transpose_b: If `True`, `b` is transposed before multiplication. a_is_sparse: If `True`, `a` is treated as a sparse matrix. b_is_sparse: If `True`, `b` is treated as a sparse matrix. name: Name for the operation (optional). Returns: A `Tensor` of the same type as `a`. """ with ops.op_scope([a, b], name, "MatMul") as name: a = ops.convert_to_tensor(a, name="a") b = ops.convert_to_tensor(b, name="b") if a.dtype == dtypes.float32 and (a_is_sparse or b_is_sparse): return sparse_matmul(a, b, transpose_a=transpose_a, transpose_b=transpose_b, a_is_sparse=a_is_sparse, b_is_sparse=b_is_sparse, name=name) else: return gen_math_ops._mat_mul(a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
def rgb_to_grayscale(images, name=None): """Converts one or more images from RGB to Grayscale. Outputs a tensor of the same `DType` and rank as `images`. The size of the last dimension of the output is 1, containing the Grayscale value of the pixels. Args: images: The RGB tensor to convert. Last dimension must have size 3 and should contain RGB values. name: A name for the operation (optional). Returns: The converted grayscale image(s). """ with ops.op_scope([images], name, 'rgb_to_grayscale') as name: images = ops.convert_to_tensor(images, name='images') # Remember original dtype to so we can convert back if needed orig_dtype = images.dtype flt_image = convert_image_dtype(images, dtypes.float32) # Reference for converting between RGB and grayscale. # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) gray_float = math_ops.reduce_sum(flt_image * rgb_weights, rank_1, keep_dims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name)
def saturate_cast(value, dtype, name=None): """Performs a safe saturating cast of `value` to `dtype`. This function casts the input to `dtype` without applying any scaling. If there is a danger that values would over or underflow in the cast, this op applies the appropriate clamping before the cast. Args: value: A `Tensor`. dtype: The desired output `DType`. name: A name for the operation (optional). Returns: `value` safely cast to `dtype`. """ # When casting to a type with smaller representable range, clamp. # Note that this covers casting to unsigned types as well. with ops.op_scope([value], name, "saturate_cast") as name: value = ops.convert_to_tensor(value, name="value") dtype = dtypes.as_dtype(dtype).base_dtype if value.dtype.min < dtype.min: value = maximum(value, ops.convert_to_tensor( dtype.min, dtype=value.dtype, name="min")) if value.dtype.max > dtype.max: value = minimum(value, ops.convert_to_tensor( dtype.max, dtype=value.dtype, name="max")) return cast(value, dtype, name=name)
def floordiv(x, y, name=None): """Divides `x / y` elementwise, rounding down for floating point. The same as `tf.div(x,y)` for integers, but uses `tf.floor(tf.div(x,y))` for floating point arguments so that the result is always an integer (though possibly an integer represented as floating point). This op is generated by `x // y` floor division in Python 3 and in Python 2.7 with `from __future__ import division`. Note that for efficiency, `floordiv` uses C semantics for negative numbers (unlike Python and Numpy). `x` and `y` must have the same type, and the result will have the same type as well. Args: x: `Tensor` numerator of real numeric type. y: `Tensor` denominator of real numeric type. name: A name for the operation (optional). Returns: `x / y` rounded down (except possibly towards zero for negative integers). Raises: TypeError: If the inputs are complex. """ with ops.op_scope([x, y], name, "floordiv") as name: x = ops.convert_to_tensor(x, name="x") dtype = x.dtype if dtype.is_floating: return floor(div(x, y), name=name) else: if not dtype.is_integer: raise TypeError("Expected floating point or integer, got %r" % dtype) return div(x, y, name=name)
def _check_labels_and_scores(boolean_labels, scores, check_shape): """Check the rank of labels/scores, return tensor versions.""" with ops.op_scope([boolean_labels, scores], '_check_labels_and_scores'): boolean_labels = ops.convert_to_tensor(boolean_labels, name='boolean_labels') scores = ops.convert_to_tensor(scores, name='scores') if boolean_labels.dtype != dtypes.bool: raise ValueError( 'Argument boolean_labels should have dtype bool. Found: %s', boolean_labels.dtype) if check_shape: labels_rank_1 = logging_ops.Assert( math_ops.equal(1, array_ops.rank(boolean_labels)), ['Argument boolean_labels should have rank 1. Found: ', boolean_labels.name, array_ops.shape(boolean_labels)]) scores_rank_1 = logging_ops.Assert( math_ops.equal(1, array_ops.rank(scores)), ['Argument scores should have rank 1. Found: ', scores.name, array_ops.shape(scores)]) with ops.control_dependencies([labels_rank_1, scores_rank_1]): return boolean_labels, scores else: return boolean_labels, scores
def adjust_brightness(image, delta): """Adjust the brightness of RGB or Grayscale images. This is a convenience method that converts an RGB image to float representation, adjusts its brightness, and then converts it back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. The value `delta` is added to all components of the tensor `image`. Both `image` and `delta` are converted to `float` before adding (and `image` is scaled appropriately if it is in fixed-point representation). For regular images, `delta` should be in the range `[0,1)`, as it is added to the image in floating point representation, where pixel values are in the `[0,1)` range. Args: image: A tensor. delta: A scalar. Amount to add to the pixel values. Returns: A brightness-adjusted tensor of the same shape and type as `image`. """ with ops.op_scope([image, delta], None, 'adjust_brightness') as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) adjusted = math_ops.add(flt_image, math_ops.cast(delta, dtypes.float32), name=name) return convert_image_dtype(adjusted, orig_dtype, saturate=True)
def mode(self, name="mode"): """Mode of each batch member. The mode of a gamma distribution is `(alpha - 1) / beta` when `alpha > 1`, and `NaN` otherwise. If `self.strict_statistics` is `True`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mode for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): mode_if_defined = (alpha - 1.0) / beta if self.strict_statistics: one = ops.convert_to_tensor(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less(one, alpha)], mode_if_defined) else: alpha_ge_1 = alpha >= 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_ge_1, mode_if_defined, nan)
def variance(self, name="variance"): """Variance of this distribution.""" with ops.name_scope(self.name): with ops.op_scope([], name): return math_ops.square(self.std())
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and start the next one. This is executed by each replica. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required to check staleness") self._global_step = global_step train_ops = [] aggregated_grad = [] inputs = [] var_list = [] for x in grads_and_vars: inputs.extend(list(x)) with ops.device(global_step.device): self._local_steps = variables.Variable( array_ops.zeros( [self._total_num_replicas], dtype=global_step.dtype), trainable=False, name="local_steps") # Check staleness. Note that this has to be ref(), otherwise identity will # be accessed and it will be old values. local_step = array_ops.slice(self._local_steps.ref(), array_ops.reshape(self._replica_id, (1,)), [1], name="get_local_step") local_step = array_ops.reshape(local_step, ()) is_stale = math_ops.less(local_step, global_step) with ops.op_scope(inputs, None, self._name): for grad, var in grads_and_vars: var_list.append(var) with ops.device(var.device): if isinstance(grad, ops.Tensor): gradient_queue = (data_flow_ops.FIFOQueue(self._tokens_per_step * 2, grad.dtype, shapes=var.get_shape(), shared_name=var.name)) self._one_element_queue_list.append((gradient_queue, var.device)) train_ops.append(gradient_queue.enqueue([grad])) # Aggregate all gradients gradients = gradient_queue.dequeue_many( self._replicas_to_aggregate) aggregated_grad.append(math_ops.reduce_sum(gradients, [0])) elif grad is None: aggregated_grad.append(None) # pass-through. else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") aggregated_grad.append(self._aggregate_sparse_grad(grad, var, train_ops)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) # sync_op will be assigned to the same device as the global step. with ops.device(global_step.device), ops.name_scope(""): update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step) # Create token queue. with ops.device(global_step.device), ops.name_scope(""): sync_token_queue = ( data_flow_ops.FIFOQueue(-1, global_step.dtype.base_dtype, shapes=(), shared_name="sync_token_q")) self._sync_token_queue = sync_token_queue # dummy_queue is passed to the queue runner. Don't use the real queues # because the queue runner doesn't automatically reopen it once it # closed queues in PS devices. dummy_queue = ( data_flow_ops.FIFOQueue(1, types_pb2.DT_INT32, shapes=(), shared_name="dummy_queue")) # Clear all the gradients queues in case there are stale gradients. clear_queue_ops = [] with ops.control_dependencies([update_op]): for queue, dev in self._one_element_queue_list: with ops.device(dev): stale_grads = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_grads) for queue, dev in self._sparse_grad_queues_and_devs: with ops.device(dev): _, stale_indices = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_indices) with ops.device(global_step.device): self._clean_up_op = control_flow_ops.abort( error_msg="From sync_replicas") # According to the staleness, select between the enqueue op (real_grad) # or no-op (no_op_grad). Effectively dropping all the stale gradients. no_op_grad = lambda: [control_flow_ops.no_op(name="no_grad_enqueue")] real_grad = lambda: [control_flow_ops.group(*train_ops)] final_train_ops = control_flow_ops.cond(is_stale, no_op_grad, real_grad) with ops.device(global_step.device), ops.name_scope(""): # Replicas have to wait until they can get a token from the token queue. with ops.control_dependencies([final_train_ops]): token = sync_token_queue.dequeue() train_op = state_ops.scatter_update(self._local_steps, self._replica_id, token) with ops.control_dependencies(clear_queue_ops): # Sync_op needs to insert tokens to the token queue at the end of the # step so the replicas can fetch them to start the next step. # Note that ref() is used to avoid reading from the identity with old # the step. tokens = array_ops.fill([self._tokens_per_step], global_step.ref()) sync_op = sync_token_queue.enqueue_many((tokens,)) if self._variable_averages is not None: with ops.control_dependencies([sync_op]), ops.name_scope(""): sync_op = self._variable_averages.apply( self._variables_to_average) self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue, [sync_op]) self._gradients_applied = True return train_op
def parse_single_example(serialized, # pylint: disable=invalid-name names=None, sparse_keys=None, sparse_types=None, dense_keys=None, dense_types=None, dense_defaults=None, dense_shapes=None, name="ParseSingleExample"): """Parses a single `Example` proto. Similar to `parse_example`, except: For dense tensors, the returned `Tensor` is identical to the output of `parse_example`, except there is no batch dimension, the output shape is the same as the shape given in `dense_shape`. For `SparseTensor`s, the first (batch) column of the indices matrix is removed (the indices matrix is a column vector), the values vector is unchanged, and the first (`batch_size`) entry of the shape vector is removed (it is now a single element vector). See also `parse_example`. Args: serialized: A scalar string Tensor, a single serialized Example. See `parse_example` documentation for more details. names: (Optional) A scalar string Tensor, the associated name. See `parse_example` documentation for more details. sparse_keys: See `parse_example` documentation for more details. sparse_types: See `parse_example` documentation for more details. dense_keys: See `parse_example` documentation for more details. dense_types: See `parse_example` documentation for more details. dense_defaults: See `parse_example` documentation for more details. dense_shapes: See `parse_example` documentation for more details. name: A name for this operation (optional). Returns: A dictionary mapping keys to Tensors and SparseTensors. Raises: ValueError: if "scalar" or "names" have known shapes, and are not scalars. """ with ops.op_scope([serialized, names], name, "parse_single_example"): serialized = ops.convert_to_tensor(serialized) serialized_shape = serialized.get_shape() if serialized_shape.ndims is not None: if serialized_shape.ndims != 0: raise ValueError("Input serialized must be a scalar") else: serialized = control_flow_ops.with_dependencies( [logging_ops.Assert( math_ops.equal(array_ops.rank(serialized), 0), ["Input serialized must be a scalar"], name="SerializedIsScalar")], serialized, name="SerializedDependencies") serialized = array_ops.expand_dims(serialized, 0) if names is not None: names = ops.convert_to_tensor(names) names_shape = names.get_shape() if names_shape.ndims is not None: if names_shape.ndims != 0: raise ValueError("Input names must be a scalar") else: names = control_flow_ops.with_dependencies( [logging_ops.Assert( math_ops.equal(array_ops.rank(names), 0), ["Input names must be a scalar"], name="NamesIsScalar")], names, name="NamesDependencies") names = array_ops.expand_dims(names, 0) outputs = parse_example(serialized, names=names, sparse_keys=sparse_keys, sparse_types=sparse_types, dense_keys=dense_keys, dense_types=dense_types, dense_defaults=dense_defaults, dense_shapes=dense_shapes, name=name) if dense_keys is not None: for d in dense_keys: outputs[d] = array_ops.squeeze(outputs[d], [0], name="Squeeze_%s" % d) if sparse_keys is not None: for s in sparse_keys: outputs[s] = ops.SparseTensor( array_ops.slice(outputs[s].indices, [0, 1], [-1, -1], name="Slice_Indices_%s" % s), outputs[s].values, array_ops.slice(outputs[s].shape, [1], [-1], name="Squeeze_Shape_%s" % s)) return outputs
def read_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_all_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. parser_num_threads: The number of threads to parse examples. name: Name of resulting op. Returns: A dict of `Tensor` or `SparseTensor` objects for each in `features`. Raises: ValueError: for invalid inputs. """ with ops.op_scope([file_pattern], name, 'read_batch_features') as scope: examples = read_batch_examples(file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, name=scope) if parser_num_threads == 1: # Avoid queue overhead for single thread return parsing_ops.parse_example(examples, features) else: # Parse features into tensors in many threads and put on the queue. features_list = [] for _ in range(parser_num_threads): features_list.append( parsing_ops.parse_example(examples, features)) return input_ops.batch_join(features_list, batch_size=batch_size, capacity=queue_capacity, enqueue_many=True, name='parse_example_batch_join')
def _dynamic_rnn_loop( cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, input_size]. initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if `cell.state_size` is a tuple, then this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. Returns: Tuple `(final_outputs, final_state)`. final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. final_state: A `Tensor` matrix, or tuple of such matrices, matching in length and shapes to `initial_state`. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" # Construct an initial output input_shape = array_ops.shape(inputs) (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3) inputs_got_shape = inputs.get_shape().with_rank(3) (const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list() if const_depth is None: raise ValueError( "Input size (depth of inputs) must be accessible via shape inference, " "but saw value None.") # Prepare dynamic conditional copying of state & output zero_output = array_ops.zeros( array_ops.pack([batch_size, cell.output_size]), inputs.dtype) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") state_size = cell.state_size state_is_tuple = _is_sequence(state_size) state = _unpacked_state(state) if state_is_tuple else (state,) with ops.op_scope([], "dynamic_rnn") as scope: base_name = scope output_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "output") input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "input") input_ta = input_ta.unpack(inputs) def _time_step(time, output_ta_t, *state): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. output_ta_t: `TensorArray`, the output with existing flow. *state: List of vector tensors. Returns: The tuple (time + 1, output_ta_t with updated flow) + new_state. """ input_t = input_ta.read(time) # Restore some shape information input_t.set_shape([const_batch_size, const_depth]) # Pack state back up for use by cell state = (_packed_state(structure=state_size, state=state) if state_is_tuple else state[0]) call_cell = lambda: cell(input_t, state) if sequence_length is not None: (output, new_state) = _rnn_step( time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=state_size, skip_conditionals=True) else: (output, new_state) = call_cell() # Pack state if using state tuples new_state = ( tuple(_unpacked_state(new_state)) if state_is_tuple else (new_state,)) output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + new_state final_loop_vars = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta) + tuple(state), parallel_iterations=parallel_iterations, swap_memory=swap_memory) (output_final_ta, final_state) = (final_loop_vars[1], final_loop_vars[2:]) final_outputs = output_final_ta.pack() # Restore some shape information final_outputs.set_shape([ const_time_steps, const_batch_size, cell.output_size]) # Unpack final state if not using state tuples. final_state = ( _packed_state(structure=cell.state_size, state=final_state) if state_is_tuple else final_state[0]) return (final_outputs, final_state)
def _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, depth]. initial_state: A `Tensor` of shape [batch_size, depth]. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. Returns: Tuple (final_outputs, final_state). final_outputs: A `Tensor` of shape [time, batch_size, depth]`. final_state: A `Tensor` of shape [batch_size, depth]. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" # Construct an initial output input_shape = array_ops.shape(inputs) (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3) inputs_got_shape = inputs.get_shape().with_rank(3) (const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list() if const_depth is None: raise ValueError( "Input size (depth of inputs) must be accessible via shape inference, " "but saw value None.") # Prepare dynamic conditional copying of state & output zero_output = array_ops.zeros( array_ops.pack([batch_size, cell.output_size]), inputs.dtype) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") with ops.op_scope([], "dynamic_rnn") as scope: base_name = scope output_ta = tensor_array_ops.TensorArray(dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "output") input_ta = tensor_array_ops.TensorArray(dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "input") input_ta = input_ta.unpack(inputs) def _time_step(time, state, output_ta_t): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. state: Vector. output_ta_t: `TensorArray`, the output with existing flow. Returns: The tuple (time + 1, new_state, output_ta_t with updated flow). """ input_t = input_ta.read(time) # Restore some shape information input_t.set_shape([const_batch_size, const_depth]) call_cell = lambda: cell(input_t, state) if sequence_length is not None: (output, new_state) = _rnn_step(time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, skip_conditionals=True) else: (output, new_state) = call_cell() output_ta_t = output_ta_t.write(time, output) return (time + 1, new_state, output_ta_t) (_, final_state, output_final_ta) = control_flow_ops.while_loop( cond=lambda time, _1, _2: time < time_steps, body=_time_step, loop_vars=(time, state, output_ta), parallel_iterations=parallel_iterations, swap_memory=swap_memory) final_outputs = output_final_ta.pack() # Restore some shape information final_outputs.set_shape( [const_time_steps, const_batch_size, cell.output_size]) return (final_outputs, final_state)
def parse_example(serialized, names=None, sparse_keys=None, sparse_types=None, dense_keys=None, dense_types=None, dense_defaults=None, dense_shapes=None, name="ParseExample"): """Parses `Example` protos. Parses a number of serialized [`Example`] (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto) protos given in `serialized`. `names` may contain descriptive names for the corresponding serialized protos. These may be useful for debugging purposes, but they have no effect on the output. If not `None`, `names` must be the same length as `serialized`. This op parses serialized examples into a dictionary mapping keys to `Tensor` and `SparseTensor` objects respectively, depending on whether the keys appear in `sparse_keys` or `dense_keys`. The key `dense_keys[j]` is mapped to a `Tensor` of type `dense_types[j]` and of shape `(serialized.size(),) + dense_shapes[j]`. `dense_defaults` provides defaults for values referenced using `dense_keys`. If a key is not present in this dictionary, the corresponding dense `Feature` is required in all elements of `serialized`. `dense_shapes[j]` provides the shape of each `Feature` entry referenced by `dense_keys[j]`. The number of elements in the `Feature` corresponding to `dense_key[j]` must always have `np.prod(dense_shapes[j])` entries. The returned `Tensor` for `dense_key[j]` has shape `[N] + dense_shape[j]`, where `N` is the number of `Example`s in `serialized`. The key `sparse_keys[j]` is mapped to a `SparseTensor` of type `sparse_types[j]`. The `SparseTensor` represents a ragged matrix. Its indices are `[batch, index]` where `batch` is the batch entry the value is from, and `index` is the value's index in the list of values associated with that feature and example. Examples: For example, if one expects a `tf.float32` sparse feature `ft` and three serialized `Example`s are provided: ``` serialized = [ features { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } }, features { feature []}, features { feature { key: "ft" value { float_list { value: [3.0] } } } ] ``` then the output will look like: ``` {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]], values=[1.0, 2.0, 3.0], shape=(3, 2)) } ``` Given two `Example` input protos in `serialized`: ``` [ features { feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } } feature { key: "gps" value { float_list { value: [] } } } }, features { feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } } feature { key: "dank" value { int64_list { value: [ 42 ] } } } feature { key: "gps" value { } } } ] ``` And arguments ``` names: ["input0", "input1"], sparse_keys: ["kw", "dank", "gps"] sparse_types: [DT_STRING, DT_INT64, DT_FLOAT] ``` Then the output is a dictionary: ```python { "kw": SparseTensor( indices=[[0, 0], [0, 1], [1, 0]], values=["knit", "big", "emmy"] shape=[2, 2]), "dank": SparseTensor( indices=[[1, 0]], values=[42], shape=[2, 1]), "gps": SparseTensor( indices=[], values=[], shape=[2, 0]), } ``` For dense results in two serialized `Example`s: ``` [ features { feature { key: "age" value { int64_list { value: [ 0 ] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } }, features { feature { key: "age" value { int64_list { value: [] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } } ] ``` We can use arguments: ``` names: ["input0", "input1"], dense_keys: np.array(["age", "gender"]), dense_types: [tf.int64, tf.string], dense_defaults: { "age": -1 # "age" defaults to -1 if missing # "gender" has no specified default so it's required } dense_shapes: [(1,), (1,)], # age, gender, label, weight ``` And the expected output is: ```python { "age": [[0], [-1]], "gender": [["f"], ["f"]], } ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos. sparse_keys: A list of string keys in the examples' features. The results for these keys will be returned as `SparseTensor` objects. sparse_types: A list of `DTypes` of the same length as `sparse_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. dense_keys: A list of string keys in the examples' features. The results for these keys will be returned as `Tensor`s dense_types: A list of DTypes of the same length as `dense_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. dense_defaults: A dict mapping string keys to `Tensor`s. The keys of the dict must match the dense_keys of the feature. dense_shapes: A list of tuples with the same length as `dense_keys`. The shape of the data for each dense feature referenced by `dense_keys`. Required for any input tensors identified by `dense_keys` whose shapes are anything other than `[]` or `[1]`. name: A name for this operation (optional). Returns: A `dict` mapping keys to `Tensor`s and `SparseTensor`s. Raises: ValueError: If sparse and dense key sets intersect, or input lengths do not match up. """ with ops.op_scope([serialized, names], name, "parse_example"): names = [] if names is None else names dense_defaults = {} if dense_defaults is None else dense_defaults sparse_keys = [] if sparse_keys is None else sparse_keys sparse_types = [] if sparse_types is None else sparse_types dense_keys = [] if dense_keys is None else dense_keys dense_types = [] if dense_types is None else dense_types dense_shapes = ( [[]] * len(dense_keys) if dense_shapes is None else dense_shapes) num_dense = len(dense_keys) num_sparse = len(sparse_keys) if len(dense_shapes) != num_dense: raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" % (len(dense_shapes), num_dense)) if len(dense_types) != num_dense: raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" % (len(dense_types), num_dense)) if len(sparse_types) != num_sparse: raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" % (len(sparse_types), num_sparse)) if num_dense + num_sparse == 0: raise ValueError("Must provide at least one sparse key or dense key") if not set(dense_keys).isdisjoint(set(sparse_keys)): raise ValueError( "Dense and sparse keys must not intersect; intersection: %s" % set(dense_keys).intersection(set(sparse_keys))) dense_defaults_vec = [] for i, key in enumerate(dense_keys): default_value = dense_defaults.get(key) if default_value is None: default_value = constant_op.constant([], dtype=dense_types[i]) elif not isinstance(default_value, ops.Tensor): key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) default_value = ops.convert_to_tensor( default_value, dtype=dense_types[i], name=key_name) default_value = array_ops.reshape(default_value, dense_shapes[i]) dense_defaults_vec.append(default_value) dense_shapes = [tensor_util.MakeTensorShapeProto(shape) if isinstance(shape, (list, tuple)) else shape for shape in dense_shapes] outputs = gen_parsing_ops._parse_example( serialized=serialized, names=names, dense_defaults=dense_defaults_vec, sparse_keys=sparse_keys, sparse_types=sparse_types, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs sparse_tensors = [ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(sparse_indices, sparse_values, sparse_shapes)] return dict( zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
def parse_single_sequence_example(serialized, # pylint: disable=invalid-name context_sparse_keys=None, context_sparse_types=None, context_dense_keys=None, context_dense_types=None, context_dense_defaults=None, context_dense_shapes=None, feature_list_sparse_keys=None, feature_list_sparse_types=None, feature_list_dense_keys=None, feature_list_dense_types=None, feature_list_dense_shapes=None, feature_list_dense_defaults=None, debug_name=None, name="ParseSingleSequenceExample"): # pylint: disable=line-too-long """Parses a single `SequenceExample` proto. Parses a single serialized [`SequenceExample`] (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto) proto given in `serialized`. This op parses a serialize sequence example into a tuple of dictionaries mapping keys to `Tensor` and `SparseTensor` objects respectively. The first dictionary contains mappings for keys appearing in `context_sparse_keys` or `context_dense_keys`, and the second dictionary contains mappings for keys appearing in `feature_list_dense_keys`. The `context` keys are associated with a `SequenceExample` as a whole, independent of time / frame. In contrast, the `feature_list` keys provide a way to access variable-length data within the `FeatureList` section of the `SequenceExample` proto. While the shapes of `context` values are fixed with respect to frame, the frame dimension (the first dimension) of `feature_list` values may vary from `SequenceExample` to `SequenceExample` and even between `feature_list` keys within the same `SequenceExample`. The key `context_dense_keys[j]` is mapped to a `Tensor` of type `context_dense_types[j]` and of shape `context_dense_shapes[j]`. `context_dense_defaults` provides defaults for values referenced using `context_dense_keys`. If a key is not present in this dictionary, the corresponding context_dense `Feature` is required in `serialized`. `context_dense_shapes[j]` provides the shape of each context `Feature` entry referenced by `context_dense_keys[j]`. The number of elements in the `Feature` corresponding to `context_dense_key[j]` must always have `np.prod(context_dense_shapes[j])` entries. The returned `Tensor` for `context_dense_key[j]` has shape `context_dense_shape[j]`. The key `context_sparse_keys[j]` is mapped to a `SparseTensor` of type `context_sparse_types[j]`. This `SparseTensor` represents a ragged vector. Its indices are `[index]`, where `index` is the value's index in the list of values associated with that feature and example. The key `feature_list_dense_keys[j]` is mapped to a `Tensor` of type `feature_list_dense_types[j]` and of shape `(T,) + feature_list_dense_shapes[j]`, where `T` is the length of the associated `FeatureList` in the `SequenceExample`. Note: every key declared in `feature_list_dense_keys` **must** be provided in the `SequenceExample`'s `FeatureLists`, even if just empty. Exceptions are allowed by adding the given key to the map `feature_list_dense_defaults` with value None. Any key with value None map will be treated as empty (zero length) if not found in the `FeatureList` map. The key `feature_list_sparse_keys[j]` is mapped to a `SparseTensor` of type `feature_list_sparse_types[j]`. This `SparseTensor` represents a ragged vector. Its indices are `[time, index]`, where `time` is the FeatureList entry `index` is the value's index in the list of values associated with that time. `debug_name` may contain a descriptive name for the corresponding serialized proto. This may be useful for debugging purposes, but it has no effect on the output. If not `None`, `debug_name` must be a scalar. Args: serialized: A scalar (0-D Tensor) of type string, a single binary serialized `SequenceExample` proto. context_sparse_keys: A list of string keys in the `SequenceExample`'s features. The results for these keys will be returned as `SparseTensor` objects. context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. context_dense_keys: A list of string keys in the examples' features. The results for these keys will be returned as `Tensor`s context_dense_types: A list of DTypes, same length as `context_dense_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. context_dense_defaults: A dict mapping string keys to `Tensor`s. The keys of the dict must match the context_dense_keys of the feature. context_dense_shapes: A list of tuples, same length as `context_dense_keys`. The shape of the data for each context_dense feature referenced by `context_dense_keys`. Required for any input tensors identified by `context_dense_keys` whose shapes are anything other than `[]` or `[1]`. feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s feature_lists. The results for these keys will be returned as `SparseTensor` objects. feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. feature_list_dense_keys: A list of string keys in the `SequenceExample`'s features_lists. The results for these keys will be returned as `Tensor`s. feature_list_dense_types: A list of `DTypes`, same length as `feature_list_dense_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. feature_list_dense_shapes: A list of tuples, same length as `feature_list_dense_keys`. The shape of the data for each `FeatureList` feature referenced by `feature_list_dense_keys`. feature_list_dense_defaults: A dict mapping key strings to values. The only currently allowed value is `None`. Any key appearing in this dict with value `None` is allowed to be missing from the `SequenceExample`. If missing, the key is treated as zero-length. debug_name: A scalar (0-D Tensor) of strings (optional), the name of the serialized proto. name: A name for this operation (optional). Returns: A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. The first dict contains the context key/values. The second dict contains the feature_list key/values. Raises: ValueError: If context_sparse and context_dense key sets intersect, if input lengths do not match up, or if a value in feature_list_dense_defaults is not None. TypeError: if feature_list_dense_defaults is not either None or a dict. """ # pylint: enable=line-too-long with ops.op_scope( [serialized, debug_name], name, "parse_single_sequence_example"): context_dense_defaults = ( {} if context_dense_defaults is None else context_dense_defaults) context_sparse_keys = ( [] if context_sparse_keys is None else context_sparse_keys) context_sparse_types = ( [] if context_sparse_types is None else context_sparse_types) context_dense_keys = ( [] if context_dense_keys is None else context_dense_keys) context_dense_types = ( [] if context_dense_types is None else context_dense_types) context_dense_shapes = ( [[]] * len(context_dense_keys) if context_dense_shapes is None else context_dense_shapes) feature_list_sparse_keys = ( [] if feature_list_sparse_keys is None else feature_list_sparse_keys) feature_list_sparse_types = ( [] if feature_list_sparse_types is None else feature_list_sparse_types) feature_list_dense_keys = ( [] if feature_list_dense_keys is None else feature_list_dense_keys) feature_list_dense_types = ( [] if feature_list_dense_types is None else feature_list_dense_types) feature_list_dense_shapes = ( [[]] * len(feature_list_dense_keys) if feature_list_dense_shapes is None else feature_list_dense_shapes) feature_list_dense_defaults = ( dict() if feature_list_dense_defaults is None else feature_list_dense_defaults) # Internal feature_list_dense_missing_assumed_empty = [] num_context_dense = len(context_dense_keys) num_feature_list_dense = len(feature_list_dense_keys) num_context_sparse = len(context_sparse_keys) num_feature_list_sparse = len(feature_list_sparse_keys) if len(context_dense_shapes) != num_context_dense: raise ValueError( "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d" % (len(context_dense_shapes), num_context_dense)) if len(context_dense_types) != num_context_dense: raise ValueError( "len(context_dense_types) != len(num_context_dense): %d vs. %d" % (len(context_dense_types), num_context_dense)) if len(feature_list_dense_shapes) != num_feature_list_dense: raise ValueError( "len(feature_list_dense_shapes) != len(feature_list_dense_keys): " "%d vs. %d" % (len(feature_list_dense_shapes), num_feature_list_dense)) if len(feature_list_dense_types) != num_feature_list_dense: raise ValueError( "len(feature_list_dense_types) != len(num_feature_list_dense):" "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense)) if len(context_sparse_types) != num_context_sparse: raise ValueError( "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d" % (len(context_sparse_types), num_context_sparse)) if len(feature_list_sparse_types) != num_feature_list_sparse: raise ValueError( "len(feature_list_sparse_types) != len(feature_list_sparse_keys): " "%d vs. %d" % (len(feature_list_sparse_types), num_feature_list_sparse)) if (num_context_dense + num_context_sparse + num_feature_list_dense + num_feature_list_sparse) == 0: raise ValueError( "Must provide at least one context_sparse key, context_dense key, " ", feature_list_sparse key, or feature_list_dense key") if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)): raise ValueError( "context_dense and context_sparse keys must not intersect; " "intersection: %s" % set(context_dense_keys).intersection(set(context_sparse_keys))) if not set(feature_list_dense_keys).isdisjoint( set(feature_list_sparse_keys)): raise ValueError( "feature_list_dense and feature_list_sparse keys must not intersect; " "intersection: %s" % set(feature_list_dense_keys).intersection( set(feature_list_sparse_keys))) if not isinstance(feature_list_dense_defaults, dict): raise TypeError("feature_list_dense_defaults must be a dict") for k, v in feature_list_dense_defaults.items(): if v is not None: raise ValueError("Value feature_list_dense_defaults[%s] must be None" % k) feature_list_dense_missing_assumed_empty.append(k) context_dense_defaults_vec = [] for i, key in enumerate(context_dense_keys): default_value = context_dense_defaults.get(key) if default_value is None: default_value = constant_op.constant([], dtype=context_dense_types[i]) elif not isinstance(default_value, ops.Tensor): key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) default_value = ops.convert_to_tensor( default_value, dtype=context_dense_types[i], name=key_name) default_value = array_ops.reshape( default_value, context_dense_shapes[i]) context_dense_defaults_vec.append(default_value) context_dense_shapes = [tensor_util.MakeTensorShapeProto(shape) if isinstance(shape, (list, tuple)) else shape for shape in context_dense_shapes] feature_list_dense_shapes = [tensor_util.MakeTensorShapeProto(shape) if isinstance(shape, (list, tuple)) else shape for shape in feature_list_dense_shapes] outputs = gen_parsing_ops._parse_single_sequence_example( serialized=serialized, debug_name=debug_name, context_dense_defaults=context_dense_defaults_vec, context_sparse_keys=context_sparse_keys, context_sparse_types=context_sparse_types, context_dense_keys=context_dense_keys, context_dense_shapes=context_dense_shapes, feature_list_sparse_keys=feature_list_sparse_keys, feature_list_sparse_types=feature_list_sparse_types, feature_list_dense_keys=feature_list_dense_keys, feature_list_dense_types=feature_list_dense_types, feature_list_dense_shapes=feature_list_dense_shapes, feature_list_dense_missing_assumed_empty=( feature_list_dense_missing_assumed_empty), name=name) (context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values) = outputs context_sparse_tensors = [ ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(context_sparse_indices, context_sparse_values, context_sparse_shapes)] feature_list_sparse_tensors = [ ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes)] context_output = dict( zip(context_sparse_keys + context_dense_keys, context_sparse_tensors + context_dense_values)) feature_list_output = dict( zip(feature_list_sparse_keys + feature_list_dense_keys, feature_list_sparse_tensors + feature_list_dense_values)) return (context_output, feature_list_output)
def log_sigma_det(self, name="log_sigma_det"): """Log of determinant of covariance matrix.""" with ops.name_scope(self.name): with ops.op_scope(self._cov.inputs, name): return self._cov.log_det()
def random_uniform(shape, minval=0, maxval=None, dtype=dtypes.float32, seed=None, name=None): """Outputs random values from a uniform distribution. The generated values follow a uniform distribution in the range `[minval, maxval)`. The lower bound `minval` is included in the range, while the upper bound `maxval` is excluded. For floats, the default range is `[0, 1)`. For ints, at least `maxval` must be specified explicitly. In the integer case, the random integers are slightly biased unless `maxval - minval` is an exact power of two. The bias is small for values of `maxval - minval` significantly smaller than the range of the output (either `2**32` or `2**64`). Args: shape: A 1-D integer Tensor or Python array. The shape of the output tensor. minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the range of random values to generate. Defaults to 0. maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on the range of random values to generate. Defaults to 1 if `dtype` is floating point. dtype: The type of the output: `float32`, `float64`, `int32`, or `int64`. seed: A Python integer. Used to create a random seed for the distribution. See [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) for behavior. name: A name for the operation (optional). Returns: A tensor of the specified shape filled with random uniform values. Raises: ValueError: If `dtype` is integral and `maxval` is not specified. """ dtype = dtypes.as_dtype(dtype) if maxval is None: if dtype.is_integer: raise ValueError("Must specify maxval for integer dtype %r" % dtype) maxval = 1 with ops.op_scope([shape, minval, maxval], name, "random_uniform") as name: shape = _ShapeTensor(shape) minval = ops.convert_to_tensor(minval, dtype=dtype, name="min") maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") seed1, seed2 = random_seed.get_seed(seed) if dtype.is_integer: return gen_random_ops._random_uniform_int(shape, minval, maxval, seed=seed1, seed2=seed2, name=name) else: rnd = gen_random_ops._random_uniform(shape, dtype, seed=seed1, seed2=seed2) return math_ops.add(rnd * (maxval - minval), minval, name=name)
def separable_conv2d(input, depthwise_filter, pointwise_filter, strides, padding, name=None): """2-D convolution with separable filters. Performs a depthwise convolution that acts separately on channels followed by a pointwise convolution that mixes channels. Note that this is separability between dimensions `[1, 2]` and `3`, not spatial separability between dimensions `1` and `2`. In detail, output[b, i, j, k] = sum_{di, dj, q, r] input[b, strides[1] * i + di, strides[2] * j + dj, q] * depthwise_filter[di, dj, q, r] * pointwise_filter[0, 0, q * channel_multiplier + r, k] `strides` controls the strides for the depthwise convolution only, since the pointwise convolution has implicit strides of `[1, 1, 1, 1]`. Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D `Tensor` with shape `[batch, in_height, in_width, in_channels]`. depthwise_filter: 4-D `Tensor` with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. Contains `in_channels` convolutional filters of depth 1. pointwise_filter: 4-D `Tensor` with shape `[1, 1, channel_multiplier * in_channels, out_channels]`. Pointwise filter to mix channels after `depthwise_filter` has convolved spatially. strides: 1-D of size 4. The strides for the depthwise convolution for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, out_channels]`. """ with ops.op_scope([input, depthwise_filter, pointwise_filter], name, "separable_conv2d") as name: input = ops.convert_to_tensor(input, name="tensor_in") depthwise_filter = ops.convert_to_tensor(depthwise_filter, name="depthwise_filter") pointwise_filter = ops.convert_to_tensor(pointwise_filter, name="pointwise_filter") if pointwise_filter.get_shape().ndims is not None: assert len(pointwise_filter.get_shape()) == 4 assert pointwise_filter.get_shape()[0] == 1 assert pointwise_filter.get_shape()[1] == 1 if depthwise_filter.get_shape().ndims and input.get_shape().ndims: channel_multiplier = depthwise_filter.get_shape()[3] in_channels = input.get_shape()[3] out_channels = pointwise_filter.get_shape()[3] # This would mean the separable convolutions is over-parametrized. assert channel_multiplier * in_channels < out_channels # The layout of the ops in the graph are expected to be as follows: # separable_conv2d // Conv2D op corresponding to the pointwise conv. # separable_conv2d/depthwise // Concat op for the deptwise outputs. # separable_conv2d/depthwise/depth0 // Conv2D op for depth 0 # separable_conv2d/depthwise/depth1 // Conv2D op for depth 1 # separable_conv2d/depthwise/depth2 // Conv2D op for depth 2 depthwise = depthwise_conv2d(input, depthwise_filter, strides, padding, name="depthwise") return nn_ops.conv2d(depthwise, pointwise_filter, [1, 1, 1, 1], padding="VALID", name=name)
def weighted_cross_entropy_with_logits(logits, targets, pos_weight, name=None): """Computes a weighted cross entropy. This is like `sigmoid_cross_entropy_with_logits()` except that `pos_weight`, allows one to trade off recall and precision by up- or down-weighting the cost of a positive error relative to a negative error. The usual cross-entropy cost is defined as: targets * -log(sigmoid(logits)) + (1 - targets) * -log(1 - sigmoid(logits)) The argument `pos_weight` is used as a multiplier for the positive targets: targets * -log(sigmoid(logits)) * pos_weight + (1 - targets) * -log(1 - sigmoid(logits)) For brevity, let `x = logits`, `z = targets`, `q = pos_weight`. The loss is: qz * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = qz * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = qz * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = qz * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + (qz + 1 - z) * log(1 + exp(-x)) = (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x)) Setting `l = (1 + (q - 1) * z)`, to ensure stability and avoid overflow, the implementation uses (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0)) `logits` and `targets` must have the same type and shape. Args: logits: A `Tensor` of type `float32` or `float64`. targets: A `Tensor` of the same type and shape as `logits`. pos_weight: A coefficient to use on the positive examples. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise weightedlogistic losses. Raises: ValueError: If `logits` and `targets` do not have the same shape. """ with ops.op_scope([logits, targets], name, "logistic_loss") as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) # The logistic loss formula from above is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(x)) - l * x # To avoid branching, we use the combined version # (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0)) log_weight = 1 + (pos_weight - 1) * targets return math_ops.add( (1 - targets) * logits, log_weight * (math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(-logits)), name=name)
def sufficient_statistics(x, axes, shift=True, keep_dims=False, name=None): """Calculate the sufficient statistics for the mean and variance of `x`. These sufficient statistics are computed using the one pass algorithm on an input that's optionally shifted using the value of the 1st element in `x`. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: If true, shift the data to provide more numerically stable results. keep_dims: produce statistics with the same dimensionality as the input. name: Name used to scope the operations that compute the sufficient stats. Returns: Four `Tensor` objects of the same type as `x`: * the count (number of elements to average over). * the (possibly shifted) sum of the elements in the array. * the (possibly shifted) sum of squares of the elements in the array. * the shift by which the mean must be corrected or None if `shift` is False. """ with ops.op_scope([x, axes], name, "sufficient_statistics"): x = ops.convert_to_tensor(x, name="x") x_shape = x.get_shape() if x_shape.is_fully_defined(): counts = 1 m_shape = [] for d in xrange(x_shape.ndims): dim = x_shape[d].value if d in set(axes): counts *= dim dim = 1 m_shape.append(dim) counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. x_shape = array_ops.shape(x) select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape), True, False) m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape), x_shape) counts = math_ops.cast(math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count") if shift: shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape), m_shape) m_ss = math_ops.sub(x, shift_value) v_ss = math_ops.squared_difference(x, shift_value) if keep_dims: shift_value = array_ops.identity(shift_value, name="shift") else: shift_value = array_ops.squeeze(shift_value, squeeze_dims=axes, name="shift") else: # not shift. m_ss = x v_ss = math_ops.square(x) shift_value = None m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss") v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss") return counts, m_ss, v_ss, shift_value
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[batch_size, embed_dim]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights else None embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.op_scope(embedding_weights + [sparse_ids, sparse_weights], name, "embedding_lookup") as scope: # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = tf_embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select(is_row_empty, array_ops.zeros_like(result), result, name=scope) return result
def depthwise_conv2d(input, filter, strides, padding, name=None): """Depthwise 2-D convolution. Given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter tensor of shape `[filter_height, filter_width, in_channels, channel_multiplier]` containing `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies a different filter to each input channel (expanding from 1 channel to `channel_multiplier` channels for each), then concatenates the results together. The output has `in_channels * channel_multiplier` channels. In detail, output[b, i, j, k * channel_multiplier + q] = sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D with shape `[batch, in_height, in_width, in_channels]`. filter: 4-D with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. strides: 1-D of size 4. The stride of the sliding window for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, in_channels * channel_multiplier].` """ with ops.op_scope([input, filter], name, "depthwise") as name: input = ops.convert_to_tensor(input, name="tensor_in") filter = ops.convert_to_tensor(filter, name="filter_in") # A shape is required to statically compute the number of separable filters. if filter.get_shape().ndims is not None: assert len(filter.get_shape()) == 4 in_channels = filter.get_shape()[2] # Sanity checks, if shape information is available for the inputs. if input.get_shape().ndims is not None: assert len(input.get_shape()) == 4 assert input.get_shape()[3] == in_channels, ( "Mismatched input depth %d and number of depthwise filters %d." % (input.get_shape()[3].value, in_channels)) else: assert input.get_shape().ndims is not None, ( "Either tensor must provide static shape information.") assert input.get_shape().ndims == 4 in_channels = input.get_shape()[3] if in_channels == 1: return nn_ops.conv2d(input, filter, strides, padding, name=name) else: # Create one separate convolution per channel. convs = [] for channel in xrange(in_channels): with ops.name_scope("depth%d" % channel) as channel_scope: t_in = array_ops.slice(input, [0, 0, 0, channel], [-1, -1, -1, 1], name="slice_inputs") f_in = array_ops.slice(filter, [0, 0, channel, 0], [-1, -1, 1, -1], name="slice_params") convs.append( nn_ops.conv2d(t_in, f_in, strides, padding, name=channel_scope)) # Concatenate the per-channel convolutions along the channel dimension. return array_ops.concat(3, convs, name=name)
def mean(self, name="mean"): """Mean of this distribution.""" with ops.name_scope(self.name): with ops.op_scope([self._scale, self._loc], name): return self._loc + array_ops.zeros_like(self._scale)
def sigma_det(self, name="sigma_det"): """Determinant of covariance matrix.""" with ops.name_scope(self.name): with ops.op_scope(self._cov.inputs, name): return math_ops.exp(self._cov.log_det())
def std(self, name="std"): """Standard deviation of this distribution.""" with ops.name_scope(self.name): with ops.op_scope([self._scale, self._loc], name): sqrt_2 = constant_op.constant(math.sqrt(2.), dtype=self.dtype) return sqrt_2 * self._scale + array_ops.zeros_like(self._loc)
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`. The initial value for the Variable. Must have a shape specified unless `validate_shape` is set to False. trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") if collections is None: collections = [ops.GraphKeys.VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] with ops.control_dependencies(None): with ops.op_scope([initial_value], name, "Variable") as name: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) initial_value_shape = self._initial_value.get_shape() if validate_shape and not initial_value_shape.is_fully_defined( ): raise ValueError( "initial_value must have a shape specified: %s" % self._initial_value) shape_to_set = initial_value_shape if validate_shape else [] self._variable = state_ops.variable_op( shape_to_set, self._initial_value.dtype.base_dtype, set_shape=validate_shape, name=name) with ops.colocate_with(self._variable.op): self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op # TODO(vrv): Change this class to not take caching_device, but # to take the op to colocate the snapshot with, so we can use # colocation rather than devices. if caching_device is not None: with ops.device(caching_device): self._snapshot = array_ops.identity(self._variable, name="read") else: with ops.colocate_with(self._variable.op): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def _get_partitioned_variable( self, name, partitioner, shape=None, dtype=dtypes.float32, initializer=None, regularizer=None, reuse=None, trainable=True, collections=None, caching_device=None, validate_shape=True): """Gets or creates a sharded variable list with these parameters. The `partitioner` must be a callable that accepts a fully defined `TensorShape` and returns a sequence of integers (the `partitions`). These integers describe how to partition the given sharded `Variable` along the given dimension. That is, `partitions[1] = 3` means split the `Variable` into 3 shards along dimension 1. Currently, sharding along only one axis is supported. If the list of variables with the given name (prefix) is already stored, we return the stored variables. Otherwise, we create a new one. Set `reuse` to `True` when you only want to reuse existing Variables. Set `reuse` to `False` when you only want to create new Variables. If `reuse` is `None` (the default), both new and existing variables are returned. If initializer is `None` (the default), the default initializer passed in the constructor is used. If that one is `None` too, we use a new `UniformUnitScalingInitializer`. If initializer is a Tensor, we use it as a value and derive the shape from the initializer. If the initializer is a callable, then it will be called for each shard. Otherwise the initializer should match the shape of the entire sharded Variable, and it will be sliced accordingly for each shard. Some useful partitioners are available. See, e.g., `variable_axis_size_partitioner`. Args: name: the name of the new or existing sharded variable. partitioner: Optional callable that accepts a fully defined `TensorShape` and `dtype` of the Variable to be created, and returns a list of partitions for each axis (currently only one axis can be partitioned). shape: shape of the new or existing sharded variable. dtype: type of the new or existing sharded variable (defaults to `DT_FLOAT`). initializer: initializer for the sharded variable. regularizer: a (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. reuse: a Boolean or `None`. Controls reuse or creation of variables. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). collections: List of graph collections keys to add the Variable to. Defaults to `[GraphKeys.VARIABLES]` (see tf.Variable). caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. validate_shape: If False, allows the variable to be initialized with a value of unknown shape. If True, the default, the shape of initial_value must be known. Returns: A tuple `(shards, partitions)` where `shards` is the list of `Variable` shards and `partitions` is the output of the partitioner on the input shape. Raises: ValueError: when creating a new variable and shape is not declared, when reusing a variable and specifying a conflicting shape, when violating reuse during variable creation, or if an existing sharded variable exists for the given name but with different sharding. """ initializing_from_value = initializer is not None and isinstance( initializer, ops.Tensor) reuse_without_partition = reuse is True and partitioner is None if name in self._vars: raise ValueError( "A partitioner was provided, but an unpartitioned version of the " "variable was found: %s. Perhaps a variable of the same name was " "already created without partitioning?" % name) shape = tensor_shape.as_shape(shape) if initializing_from_value: shape = initializer.get_shape() if not reuse_without_partition: if not shape.is_fully_defined(): raise ValueError("Shape of a new partitioned variable (%s) must be " "fully defined, but instead was %s." % (name, shape)) if shape.ndims < 1: raise ValueError("A partitioned Variable must have rank at least 1, " "shape: %s" % shape) partitions = partitioner(shape=shape, dtype=dtype) if not isinstance(partitions, collections_lib.Sequence): raise ValueError("Partitioner must return a sequence, but saw: %s" % partitions) if len(partitions) != shape.ndims: raise ValueError( "Partitioner returned a partition list that does not match the " "Variable's rank: %s vs. %s" % (partitions, shape)) if any([p < 1 for p in partitions]): raise ValueError( "Partitioner returned zero partitions for some axes: %s" % partitions) should_check = reuse is not None if name in self._partitioned_vars: if should_check and not reuse: raise ValueError( "Partitioned variable with name %s already exists. Did you mean to " "set reuse=True in VarScope?" % name) existing_var = self._partitioned_vars[name] if not shape.is_compatible_with(existing_var.get_shape()): raise ValueError( "Trying to reuse partitioned variable %s, but specified shape %s " "and found shape %s." % (name, shape, existing_var.get_shape())) if not dtype.is_compatible_with(existing_var.dtype): raise ValueError( "Trying to reuse partitioned variable %s, but specified dtype %s " "and found dtype %s." % (name, dtype.name, existing_var.dtype.name)) # pylint: disable=protected-access if (not reuse_without_partition and existing_var._get_partitions() != partitions): raise ValueError( "Trying to reuse partitioned variable %s, but specified partitions " "%s and found partitions %s." % (name, partitions, existing_var._get_partitions())) # pylint: enable=protected-access return existing_var if should_check and reuse: raise ValueError("PartitionedVariable %s does not exist, disallowed." " Did you mean to set reuse=None in VarScope?" % name) slice_dim, slice_shape = _compute_slice_dim_and_shape( shape.as_list(), partitions) vs = [] num_slices = partitions[slice_dim] num_slices_with_excess = shape[slice_dim].value % num_slices slice_offset = [0] * shape.ndims if "%s/part_0" % name in self._vars: if "%s/part_%d" % (name, num_slices - 1) not in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but %s/part_%d was not." % (num_slices, name, name, num_slices - 1)) if "%s/part_%d" % (name, num_slices) in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but so was the extra shard %s/part_%d." % (num_slices, name, name, num_slices)) for i in xrange(num_slices): var_shape = slice_shape[:] var_offset = slice_offset[:] if i < num_slices_with_excess: var_shape[slice_dim] += 1 slice_offset[slice_dim] += var_shape[slice_dim] var_full_name = "%s/part_%d" % (name, i) with ops.op_scope([], var_full_name + "/Initializer"): if initializer is None: init = init_ops.uniform_unit_scaling_initializer( full_shape=shape.as_list()) init_shape = var_shape elif callable(initializer): init = initializer init_shape = var_shape elif isinstance(initializer, ops.Tensor): init = array_ops.slice(initializer, var_offset, var_shape) # Use the dtype of the given tensor. dtype = init.dtype.base_dtype init_shape = None else: init = ops.convert_to_tensor(initializer, dtype=dtype) init = array_ops.slice(init, var_offset, var_shape) init_shape = None with ops.name_scope(None): var = self._get_single_variable( name=var_full_name, shape=init_shape, dtype=dtype, initializer=init, regularizer=regularizer, reuse=reuse, trainable=trainable, collections=collections, caching_device=caching_device, validate_shape=validate_shape) # pylint: disable=protected-access var._set_save_slice_info(variables.Variable.SaveSliceInfo( name, shape.as_list(), var_offset, var_shape)) vs.append(var) # pylint: enable=protected-access # pylint: disable=protected-access partitioned_var = variables._PartitionedVariable(name=name, shape=shape, dtype=dtype, variable_list=vs, partitions=partitions) # pylint: enable=protected-access self._partitioned_vars[name] = partitioned_var return partitioned_var
def std(self, name="std"): with ops.name_scope(self.name): with ops.op_scope([self.range()], name): return self.range() / math_ops.sqrt(12.)
def range(self, name="range"): """`b - a`.""" with ops.name_scope(self.name): with ops.op_scope([self.a, self.b], name): return self.b - self.a
def mean(self, name="mean"): with ops.name_scope(self.name): with ops.op_scope([self._a, self._b], name): return (self.a + self.b) / 2
def variance(self, name="variance"): with ops.name_scope(self.name): with ops.op_scope([self.range()], name): return math_ops.square(self.range()) / 12.
def event_shape(self, name="event_shape"): with ops.name_scope(self.name): with ops.op_scope([], name): return constant_op.constant([], dtype=dtypes.int32)
def log_cdf(self, x, name="log_cdf"): with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, x], name): x = ops.convert_to_tensor(x, name="x") return math_ops.log(self.cdf(x))
def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. Args: global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Returns: An Operation that updates the variables passed in the constructor. """ # Technically, the op depends on a lot more than the variables, # but we'll keep the list short. with op_scope([], name, 'sdca/minimize'): sparse_features_indices = [] sparse_features_values = [] for sf in self._examples['sparse_features']: sparse_features_indices.append(convert_to_tensor(sf.indices)) sparse_features_values.append(convert_to_tensor(sf.values)) example_ids_hashed = _sdca_ops.sdca_fprint(convert_to_tensor( self._examples['example_ids'])) example_state_data = self._hashtable.lookup(example_ids_hashed) example_state_data_updated = _sdca_ops.sdca_solver( sparse_features_indices, sparse_features_values, self._convert_n_to_tensor(self._examples['dense_features']), convert_to_tensor(self._examples['example_weights']), convert_to_tensor(self._examples['example_labels']), self._convert_n_to_tensor( self._slots['unshrinked_sparse_features_weights'], as_ref=True), self._convert_n_to_tensor( self._slots['unshrinked_dense_features_weights'], as_ref=True), example_state_data, l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization(), # TODO(sibyl-Aix6ihai): Provide empirical evidence for this. It is better # to run more than one iteration on single mini-batch as we want to # spend more time in compute. SDCA works better with larger # mini-batches and there is also recent work that shows its better to # reuse old samples than train on new samples. # See: http://arxiv.org/abs/1602.02136. num_inner_iterations=2, loss_type=self._options['loss_type']) with ops.control_dependencies([example_state_data_updated]): insert_op = self._hashtable.insert(example_ids_hashed, example_state_data_updated) update_ops = [insert_op] for name in ['sparse_features_weights', 'dense_features_weights']: for var, slot_var in zip(self._variables[name], self._slots['unshrinked_' + name]): update_ops.append(var.assign(slot_var)) update_group = control_flow_ops.group(*update_ops) with ops.control_dependencies([update_group]): shrink_l1 = _sdca_ops.sdca_shrink_l1( self._convert_n_to_tensor( self._variables['sparse_features_weights'], as_ref=True), self._convert_n_to_tensor( self._variables['dense_features_weights'], as_ref=True), l1=self._options['symmetric_l1_regularization'], l2=self._symmetric_l2_regularization()) if not global_step: return shrink_l1 with ops.control_dependencies([shrink_l1]): return state_ops.assign_add(global_step, 1, name=name).op
def batch_shape(self, name="batch_shape"): with ops.name_scope(self.name): with ops.op_scope([], name): return array_ops.shape(self._ones())
def mode(self, name="mode"): """Mode of each batch member.""" with ops.name_scope(self.name): with ops.op_scope([self._mu], name): return array_ops.identity(self._mu)
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape `[num_classes, dim]`. The (possibly-partitioned) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) subtract_log_q: A `bool`. whether to subtract the log expected count of the labels in the sample to get the logits of the true labels. Default is True. Turn off for Negative Sampling. remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is False. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: out_logits, out_labels: `Tensor` objects each with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). """ if not isinstance(weights, list): weights = [weights] with ops.op_scope(weights + [biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice( all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels