def to_weighted_sum(self, input_tensor, num_outputs=1, weight_collections=None, trainable=True): """Returns a Tensor as linear predictions and a list of created Variable.""" dimension = self.source_column.dimension batch_size = array_ops.shape(input_tensor)[0] if dimension > 1: i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims( math_ops.range(0, batch_size), 1), [1, dimension]), [-1]) i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets # TODO(chapelle): move that logic to insert_transformed_feature to ensure # unique buckets across dimensions after crossing. bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2 else: # Simpler indices when dimension=1 i1 = math_ops.range(0, batch_size) i2 = array_ops.zeros([batch_size], dtype=dtypes.int32) bucket_indices = array_ops.reshape(input_tensor, [-1]) indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2)))) shape = math_ops.to_int64(array_ops.pack([batch_size, 1])) sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape) vocab_size = self.length * self.source_column.dimension return _create_embedding_lookup( sparse_id_values, vocab_size, num_outputs, _add_variable_collection(weight_collections), 0., "sum", trainable, self.name + "_weights")
def crop_to_1d_bounding_box(image, offset_height, target_height, dynamic_shape=False): """Crops an image to a specified bounding box. This op cuts a rectangular part out of `image`. The top-left corner of the returned image is at `offset_height, offset_width` in `image`, and its lower-right corner is at `offset_height + target_height, offset_width + target_width`. Args: image: 3-D tensor with shape `[height, width, channels]` offset_height: Vertical coordinate of the top-left corner of the result in the input. target_height: Height of the result. dynamic_shape: Whether the input image has undertermined shape. If set to `True`, shape information will be retrieved at run time. Default to `False`. Returns: 3-D tensor of image with shape `[target_height, target_width, channels]` Raises: ValueError: If the shape of `image` is incompatible with the `offset_*` or `target_*` arguments, and `dynamic_shape` is set to `False`. """ image = tf.convert_to_tensor(image, name='image') height, _ = _ImageDimensions(image, dynamic_shape=dynamic_shape) cropped = array_ops.slice(image, array_ops.pack([offset_height, 0]), array_ops.pack([target_height, -1])) return cropped
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ state_size = self.state_size if nest.is_sequence(state_size): state_size_flat = nest.flatten(state_size) zeros_flat = [ array_ops.zeros( array_ops.pack(_state_size_with_prefix(s, prefix=[batch_size])), dtype=dtype) for s in state_size_flat] for s, z in zip(state_size_flat, zeros_flat): z.set_shape(_state_size_with_prefix(s, prefix=[None])) zeros = nest.pack_sequence_as(structure=state_size, flat_sequence=zeros_flat) else: zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size]) zeros = array_ops.zeros(array_ops.pack(zeros_size), dtype=dtype) zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None])) return zeros
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int, then the return value is a `2-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ state_size = self.state_size if _is_sequence(state_size): state_size_flat = _unpacked_state(state_size) zeros_flat = [ array_ops.zeros(array_ops.pack([batch_size, s]), dtype=dtype) for s in state_size_flat] for s, z in zip(state_size_flat, zeros_flat): z.set_shape([None, s]) zeros = _packed_state(structure=state_size, state=zeros_flat) else: zeros = array_ops.zeros( array_ops.pack([batch_size, state_size]), dtype=dtype) zeros.set_shape([None, state_size]) return zeros
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.pack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unpack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unpack(inputs) outputs, state = rnn.rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.pack(outputs) return outputs, state
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int, then the return value is a `2-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a list or tuple of ints, then the return value is a tuple of `2-D` tensors with shape `[batch_size x s] for s in state_size`. """ state_size = self.state_size if isinstance(state_size, (list, tuple)): zeros = tuple( array_ops.zeros(array_ops.pack([batch_size, s]), dtype=dtype) for s in state_size) for s, z in zip(state_size, zeros): z.set_shape([None, s]) else: zeros = array_ops.zeros( array_ops.pack([batch_size, state_size]), dtype=dtype) zeros.set_shape([None, state_size]) return zeros
def testConst(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape).astype(np.float32) # Pack back into a single tensorflow tensor directly using np array c = array_ops.pack(data) # This is implemented via a Const: self.assertEqual(c.op.type, "Const") self.assertAllEqual(c.eval(), data) # Python lists also work for 1-D case: if len(shape) == 1: data_list = list(data) cl = array_ops.pack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) cl = array_ops.stack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) # Verify that shape induction works with shapes produced via const pack a = constant_op.constant([1, 2, 3, 4, 5, 6]) b = array_ops.reshape(a, array_ops.pack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3]) b = array_ops.reshape(a, array_ops.stack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3])
def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, name=None): """Computes the confusion matrix from predictions and labels. Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. Considering a prediction array such as: `[1, 2, 3]` And a label array such as: `[2, 2, 3]` The confusion matrix returned would be the following one: [[0, 0, 0] [0, 1, 0] [0, 1, 0] [0, 0, 1]] Where the matrix rows represent the prediction labels and the columns represents the real labels. The confusion matrix is always a 2-D array of shape [n, n], where n is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. Args: predictions: A 1-D array represeting the predictions for a given classification. labels: A 1-D represeting the real labels for the classification task. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. dtype: Data type of the confusion matrix. name: Scope name. Returns: A k X k matrix represeting the confusion matrix, where k is the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and do not have the same size. """ with ops.name_scope(name, 'confusion_matrix', [predictions, labels, num_classes]) as name: predictions, labels = metric_ops_util.remove_squeezable_dimensions( ops.convert_to_tensor( predictions, name='predictions', dtype=dtypes.int64), ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64)) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 shape = array_ops.pack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.pack([predictions, labels])) values = array_ops.ones_like(predictions, dtype) cm_sparse = ops.SparseTensor( indices=indices, values=values, shape=shape) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def build_memory(self, M_prev, read_w_prev, write_w_prev, last_output): with tf.variable_scope("memory"): # 3.1 Reading if self.read_head_size == 1: read_w, read = self.build_read_head(M_prev, tf.reshape(read_w_prev, [-1, 1]), last_output, 0) else: read_w_list = [] read_list = [] for idx in xrange(self.read_head_size): read_w_prev_idx = tf.reshape(tf.gather(read_w_prev, idx), [-1, 1]) read_w_idx, read_idx = self.build_read_head(M_prev, read_w_prev_idx, last_output, idx) read_w_list.append(tf.transpose(read_w_idx)) read_list.append(tf.reshape(read_idx, [1, self.mem_size, self.mem_dim])) read_w = array_ops.pack(read_w_list) read = array_ops.pack(read_list) # 3.2 Writing if self.write_head_size == 1: write_w, write, erase = self.build_write_head(M_prev, tf.reshape(write_w_prev, [-1, 1]), last_output, 0) M_erase = tf.ones([self.mem_size, self.mem_dim]) - OuterProd(write_w, erase) M_write = OuterProd(write_w, write) else: write_w_list = [] write_list = [] erase_list = [] M_erases = [] M_writes = [] for idx in xrange(self.write_head_size): write_w_prev_idx = tf.reshape(tf.gather(write_w_prev, idx), [-1, 1]) write_w_idx, write_idx, erase_idx = self.build_write_head(M_prev, write_w_prev_idx, last_output, idx) write_w_list.append(tf.transpose(write_w_idx)) write_list.append(tf.reshape(write_idx, [1, self.mem_size, self.mem_dim])) erase_list.append(tf.reshape(erase_idx, [1, 1, self.mem_dim])) M_erases.append(tf.ones([self.mem_size, self.mem_dim]) * OuterProd(write_w_idx, erase_idx)) M_writes.append(OuterProd(write_w_idx, write_idx)) write_w = array_ops.pack(write_w_list) write = array_ops.pack(write_list) erase = array_ops.pack(erase_list) M_erase = reduce(lambda x, y: x*y, M_erases) M_write = tf.add_n(M_writes) M = M_prev * M_erase + M_write return M, read_w, write_w, read
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width): """Crops an image to a specified bounding box. This op cuts a rectangular part out of `image`. The top-left corner of the returned image is at `offset_height, offset_width` in `image`, and its lower-right corner is at `offset_height + target_height, offset_width + target_width`. Args: image: 3-D tensor with shape `[height, width, channels]` offset_height: Vertical coordinate of the top-left corner of the result in the input. offset_width: Horizontal coordinate of the top-left corner of the result in the input. target_height: Height of the result. target_width: Width of the result. Returns: 3-D tensor of image with shape `[target_height, target_width, channels]` Raises: ValueError: If the shape of `image` is incompatible with the `offset_*` or `target_*` arguments, or either `offset_height` or `offset_width` is negative, or either `target_height` or `target_width` is not positive. """ image = ops.convert_to_tensor(image, name='image') assert_ops = [] assert_ops += _Check3DImage(image, require_static=False) height, width, depth = _ImageDimensions(image, static_only=False) assert_ops += _assert(offset_width >= 0, ValueError, 'offset_width must be >= 0.') assert_ops += _assert(offset_height >= 0, ValueError, 'offset_height must be >= 0.') assert_ops += _assert(target_width > 0, ValueError, 'target_width must be > 0.') assert_ops += _assert(target_height > 0, ValueError, 'target_height must be > 0.') assert_ops += _assert(width >= (target_width + offset_width), ValueError, 'width must be >= target + offset.') assert_ops += _assert(height >= (target_height + offset_height), ValueError, 'height must be >= target + offset.') image = control_flow_ops.with_dependencies(assert_ops, image) cropped = array_ops.slice( image, array_ops.pack([offset_height, offset_width, 0]), array_ops.pack([target_height, target_width, -1])) cropped_shape = [None if is_tensor(i) else i for i in [target_height, target_width, depth]] cropped.set_shape(cropped_shape) return cropped
def testOpsBetweenUnreachable(self): with ops.Graph().as_default() as g: t1 = constant(1.0) t2 = constant(2.0) _ = array_ops.pack([t1, t2]) t4 = constant(1.0) t5 = constant(2.0) t6 = array_ops.pack([t4, t5]) # Elements of to_ops are always listed. self._assertOpListEqual([t6.op], _OpsBetween(g, [t6.op], [t1.op]))
def testIndexedSlicesToTensorList(self): with self.test_session(): numpy_list = [] dense_list = [] sparse_list = [] for _ in range(3): np_val = np.random.rand(4, 4, 4, 4).astype(np.float32) c = constant_op.constant(np_val) c_sparse = math_ops._as_indexed_slices(c) numpy_list.append(np_val) dense_list.append(c) sparse_list.append(c_sparse) packed_dense = array_ops.pack(dense_list) packed_sparse = array_ops.pack(sparse_list) self.assertAllClose(packed_dense.eval(), packed_sparse.eval())
def sample(self, n, seed=None, name=None): """Sample `n` observations from the Multivariate Normal Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by broadcasting the hyperparameters. """ with ops.op_scope( [self._mu, self._sigma_chol, n], name, "MultivariateNormalSample"): # TODO(ebrevdo): Is there a better way to get broadcast_shape? broadcast_shape = self.mu.get_shape() n = ops.convert_to_tensor(n) sigma_shape_left = array_ops.slice( array_ops.shape(self._sigma_chol), [0], array_ops.pack([array_ops.rank(self._sigma_chol) - 2])) k_n = array_ops.pack([self._k, n]) shape = array_ops.concat(0, [sigma_shape_left, k_n]) white_samples = random_ops.random_normal( shape=shape, mean=0, stddev=1, dtype=self._mu.dtype, seed=seed) correlated_samples = math_ops.batch_matmul( self._sigma_chol, white_samples) # Move the last dimension to the front perm = array_ops.concat( 0, (array_ops.pack([array_ops.rank(correlated_samples) - 1]), math_ops.range(0, array_ops.rank(correlated_samples) - 1))) # TODO(ebrevdo): Once we get a proper tensor contraction op, # perform the inner product using that instead of batch_matmul # and this slow transpose can go away! correlated_samples = array_ops.transpose(correlated_samples, perm) samples = correlated_samples + self.mu # Provide some hints to shape inference n_val = tensor_util.constant_value(n) final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape) samples.set_shape(final_shape) return samples
def call(self, inputs): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides def get_deconv_dim(dim_size, stride_size, kernel_size, padding): if isinstance(dim_size, ops.Tensor): dim_size = math_ops.mul(dim_size, stride_size) elif dim_size is not None: dim_size *= stride_size if padding == 'valid' and dim_size is not None: dim_size += max(kernel_size - stride_size, 0) return dim_size # Infer the dynamic output shape: out_height = get_deconv_dim(height, stride_h, kernel_h, self.padding) out_width = get_deconv_dim(width, stride_w, kernel_w, self.padding) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) strides = (1, 1, stride_h, stride_w) else: output_shape = (batch_size, out_height, out_width, self.filters) strides = (1, stride_h, stride_w, 1) output_shape_tensor = array_ops.pack(output_shape) outputs = nn.conv2d_transpose( inputs, self.kernel, output_shape_tensor, strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, ndim=4)) # Infer the static output shape: out_shape = inputs.get_shape().as_list() out_shape[c_axis] = self.filters out_shape[h_axis] = get_deconv_dim( out_shape[h_axis], stride_h, kernel_h, self.padding) out_shape[w_axis] = get_deconv_dim( out_shape[w_axis], stride_w, kernel_w, self.padding) outputs.set_shape(out_shape) if self.bias: outputs = nn.bias_add( outputs, self.bias, data_format=utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def report_uninitialized_resources(resource_list=None, name="report_uninitialized_resources"): """Returns the names of all uninitialized resources in resource_list. If the returned tensor is empty then all resources have been initialized. Args: resource_list: resources to check. If None, will use shared_resources() + local_resources(). name: name for the resource-checking op. Returns: Tensor containing names of the handles of all resources which have not yet been initialized. """ if resource_list is None: resource_list = shared_resources() + local_resources() with ops.name_scope(name): if not resource_list: # Return an empty tensor so we only need to check for returned tensor # size being 0 as an indication of model ready. return array_ops.constant([], dtype=dtypes.string) # Get a 1-D boolean tensor listing whether each resource is initialized. variables_mask = math_ops.logical_not(array_ops.pack( [r.is_initialized for r in resource_list])) # Get a 1-D string tensor containing all the resource names. variable_names_tensor = array_ops.constant( [s.handle.name for s in resource_list]) # Return a 1-D tensor containing all the names of uninitialized resources. return array_ops.boolean_mask(variable_names_tensor, variables_mask)
def __init__( self, logits, dtype=dtypes.int32, strict=True, strict_statistics=True, name="Categorical"): """Initialize Categorical distributions using class log-probabilities. Args: logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities of a set of Categorical distributions. The first `N - 1` dimensions index into a batch of independent distributions and the last dimension indexes into the classes. dtype: The type of the event samples (default: int32). strict: Unused in this distribution. strict_statistics: Boolean, default True. If True, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If False, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution (optional). """ self._strict_statistics = strict_statistics self._name = name self._dtype = dtype self._strict = strict with ops.op_scope([logits], name): self._logits = ops.convert_to_tensor(logits, name="logits") logits_shape = array_ops.shape(self._logits) self._batch_rank = array_ops.size(logits_shape) - 1 self._batch_shape = array_ops.slice( logits_shape, [0], array_ops.pack([self._batch_rank])) self._num_classes = array_ops.gather(logits_shape, self._batch_rank)
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Categorical distribution. Args: n: 0-D. Number of independent samples to draw for each distribution. seed: Random seed (optional). name: A name for this operation (optional). Returns: An `int64` `Tensor` with shape `[n, batch_shape, event_shape]` """ with ops.name_scope(self.name): with ops.op_scope([self.logits, n], name): n = ops.convert_to_tensor(n, name="n") logits_2d = array_ops.reshape( self.logits, array_ops.pack([-1, self.num_classes])) samples = random_ops.multinomial(logits_2d, n, seed=seed) samples = math_ops.cast(samples, self._dtype) ret = array_ops.reshape( array_ops.transpose(samples), array_ops.concat( 0, [array_ops.expand_dims(n, 0), self.batch_shape()])) ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n)) .concatenate(self.get_batch_shape())) return ret
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. Args: decoding: List of Tensors with predictions. labels: List of Tensors with labels. sampling_decoding: Optional, List of Tensor with predictions to be used in sampling. E.g. they shouldn't have dependncy on outputs. If not provided, decoding is used. name: Operation name. Returns: Predictions and losses tensors. """ with ops.name_scope(name, "sequence_classifier", [decoding, labels]): predictions, xent_list = [], [] for i, pred in enumerate(decoding): xent_list.append(nn.softmax_cross_entropy_with_logits( pred, labels[i], name="sequence_loss/xent_raw{0}".format(i))) if sampling_decoding: predictions.append(nn.softmax(sampling_decoding[i])) else: predictions.append(nn.softmax(pred)) xent = math_ops.add_n(xent_list, name="sequence_loss/xent") loss = math_ops.reduce_sum(xent, name="sequence_loss") return array_ops_.pack(predictions, axis=1), loss
def sample_n(self, n, seed=None, name="sample_n"): """Sample `n` observations from the Categorical distribution. Args: n: `Scalar` `Tensor` of type `int32` or `int64`, the number of observations to sample. seed: Random seed (optional). name: A name for this operation (optional). Returns: An `int64` `Tensor` with shape `[n, batch_shape, event_shape]` """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self.logits, n]): n = ops.convert_to_tensor(n, name="n") logits_2d = array_ops.reshape( self.logits, array_ops.pack([-1, self.num_classes])) samples = random_ops.multinomial(logits_2d, n, seed=seed) samples = math_ops.cast(samples, self._dtype) ret = array_ops.reshape( array_ops.transpose(samples), array_ops.concat(0, ([n], self.batch_shape()))) ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n)) .concatenate(self.get_batch_shape())) return ret
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Uniform Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, n], name): n = ops.convert_to_tensor(n, name="n") n_val = tensor_util.constant_value(n) shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()]) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) samples.set_shape(inferred_shape) return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims( self.range(), 0) * samples)
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.pack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros( [], dtype=dtypes.int32)]
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) for input_ in input_seq: input_.set_shape(input_.get_shape().with_rank(2)) # Join into (time, batch_size, depth) s_joined = array_ops_.pack(input_seq) # Reverse along dimension 0 s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops_.unpack(s_reversed) return result
def _assert_has_shape(x, shape): x_shape = array_ops.shape(x) packed_shape = array_ops.pack(shape) return logging_ops.Assert( math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), ["Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape], )
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Laplace Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by broadcasting the parameters. """ with ops.name_scope(self.name): with ops.op_scope([self._loc, self._scale, n], name): n = ops.convert_to_tensor(n) n_val = tensor_util.constant_value(n) shape = array_ops.concat( 0, [array_ops.pack([n]), self.batch_shape()]) # Sample uniformly-at-random from the open-interval (-1, 1). uniform_samples = random_ops.random_uniform( shape=shape, minval=np.nextafter(self.dtype.as_numpy_dtype(-1.), self.dtype.as_numpy_dtype(0.)), maxval=self.dtype.as_numpy_dtype(1.), dtype=self.dtype, seed=seed) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) uniform_samples.set_shape(inferred_shape) return (self._loc - self._scale * math_ops.sign(uniform_samples) * math_ops.log(1. - math_ops.abs(uniform_samples)))
def inference_graph(self, input_data, data_spec=None, **inference_args): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or SparseTensor or placeholder for input data. data_spec: A list of tf.dtype values specifying the original types of each column. **inference_args: Keyword arguments to pass through to each tree. Returns: The last op in the random forest inference graph. """ data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec probabilities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): tree_data = input_data if self.params.bagged_features: tree_data = self._bag_features(i, input_data) probabilities.append(self.trees[i].inference_graph( tree_data, data_spec, **inference_args)) with ops.device(self.device_assigner.get_device(0)): all_predict = array_ops.pack(probabilities) return math_ops.div( math_ops.reduce_sum(all_predict, 0), self.params.num_trees, name='probabilities')
def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. Args: x: Input Tensor [batch_size, input_length, embed_dim]. y: Output Tensor [batch_size, output_length, embed_dim]. input_length: length of input x. output_length: length of output y. sentinel: optional first input to decoder and final output expected. If sentinel is not provided, zeros are used. Due to fact that y is not available in sampling time, shape of sentinel will be inferred from x. name: Operation name. Returns: Encoder input from x, and decoder inputs and outputs from y. """ with ops.op_scope([x, y], name, "seq2seq_inputs"): in_x = array_ops.split_squeeze(1, input_length, x) y = array_ops.split_squeeze(1, output_length, y) if not sentinel: # Set to zeros of shape of y[0], using x for batch size. sentinel_shape = array_ops_.pack( [array_ops_.shape(x)[0], y[0].get_shape()[1]]) sentinel = array_ops_.zeros(sentinel_shape) sentinel.set_shape(y[0].get_shape()) in_y = [sentinel] + y out_y = y + [sentinel] return in_x, in_y, out_y
def sample(self, n, seed=None, name="sample"): """Generate `n` samples. Args: n: scalar. Number of samples to draw from each distribution. seed: Python integer seed for RNG. name: name to give to the op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.op_scope([self.p, n], name): n = ops.convert_to_tensor(n, name="n") p_2d = array_ops.reshape(self.p, array_ops.pack([-1, 1])) q_2d = 1. - p_2d probs = array_ops.concat(1, [q_2d, p_2d]) samples = random_ops.multinomial(math_ops.log(probs), n, seed=seed) ret = array_ops.reshape( array_ops.transpose(samples), array_ops.concat(0, [array_ops.expand_dims(n, 0), self.batch_shape()])) ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n)) .concatenate(self.get_batch_shape())) return math_ops.cast(ret, self.dtype)
def sample(self, n, seed=None, name=None): """Sample `n` observations from the Exponential Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by the hyperparameters. """ broadcast_shape = self._lam.get_shape() with ops.op_scope([self.lam, n], name, "ExponentialSample"): n = ops.convert_to_tensor(n, name="n") shape = array_ops.concat( 0, [array_ops.pack([n]), array_ops.shape(self._lam)]) sampled = random_ops.random_uniform( shape, maxval=math_ops.cast(1.0, dtype=self.dtype), dtype=self.dtype) n_val = tensor_util.constant_value(n) final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape) sampled.set_shape(final_shape) return -math_ops.log(sampled) / self._lam
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Normal Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by broadcasting the hyperparameters. """ with ops.name_scope(self.name): with ops.op_scope([self._mu, self._sigma, n], name): broadcast_shape = (self._mu + self._sigma).get_shape() n = ops.convert_to_tensor(n) shape = array_ops.concat( 0, [array_ops.pack([n]), array_ops.shape(self.mean())]) sampled = random_ops.random_normal( shape=shape, mean=0, stddev=1, dtype=self._mu.dtype, seed=seed) # Provide some hints to shape inference n_val = tensor_util.constant_value(n) final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape) sampled.set_shape(final_shape) return sampled * self._sigma + self._mu
def global_norm(t_list, name=None): """Computes the global norm of multiple tensors. Given a tuple or list of tensors `t_list`, this operation returns the global norm of the elements in all tensors in `t_list`. The global norm is computed as: `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` Any entries in `t_list` that are of type None are ignored. Args: t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. name: A name for the operation (optional). Returns: A 0-D (scalar) `Tensor` of type `float`. Raises: TypeError: If `t_list` is not a sequence. """ if (not isinstance(t_list, collections.Sequence) or isinstance(t_list, six.string_types)): raise TypeError("t_list should be a sequence") t_list = list(t_list) with ops.op_scope(t_list, name, "global_norm") as name: values = [ ops.convert_to_tensor( t.values if isinstance(t, ops.IndexedSlices) else t, name="t_%d" % i) if t is not None else t for i, t in enumerate(t_list)] squared_norms = array_ops.pack( [math_ops.reduce_sum(v * v) for v in values if v]) norm = math_ops.sqrt( math_ops.reduce_sum(squared_norms), name="global_norm") return norm
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) return result
def random_flip_up_down(image, seed=None): """Randomly flips an image vertically (upside down). With a 1 in 2 chance, outputs the contents of `image` flipped along the first dimension, which is `height`. Otherwise output the image as-is. Args: image: A 3-D tensor of shape `[height, width, channels].` seed: A Python integer. Used to create a random seed. See [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) for behavior. Returns: A 3-D tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ _Check3DImage(image, require_static=False) uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) mirror = math_ops.less(array_ops.pack([uniform_random, 1.0, 1.0]), 0.5) return array_ops.reverse(image, mirror)
def _SliceGrad(op, grad): """Gradient for Slice op.""" # Create an Nx2 padding where the first column represents how many # zeros are to be prepended for each dimension, and the second # column indicates how many zeros are appended. # # The number of zeros to append is the shape of the input # elementwise-subtracted by both the begin vector and sizes vector. # # Some more reshaping is needed to assemble this tensor with the # right dimensions. input_vec = op.inputs[0] begin_vec = op.inputs[1] input_rank = array_ops.rank(input_vec) slice_size = array_ops.shape(op.outputs[0]) shape = array_ops.pack([input_rank, 1]) before_pad = array_ops.reshape(begin_vec, shape) after_pad = array_ops.reshape( array_ops.shape(input_vec) - slice_size - begin_vec, shape) paddings = array_ops.concat(1, [before_pad, after_pad]) return array_ops.pad(grad, paddings), None, None
def assert_variables_initialized(var_list=None): """Returns an Op to check if variables are initialized. NOTE: This function is obsolete and will be removed in 6 months. Please change your implementation to use `report_uninitialized_variables()`. When run, the returned Op will raise the exception `FailedPreconditionError` if any of the variables has not yet been initialized. Note: This function is implemented by trying to fetch the values of the variables. If one of the variables is not initialized a message may be logged by the C++ runtime. This is expected. Args: var_list: List of `Variable` objects to check. Defaults to the value of `all_variables().` Returns: An Op, or None if there are no variables. """ if var_list is None: var_list = all_variables() + local_variables() # Backwards compatibility for old-style variables. TODO(touts): remove. if not var_list: var_list = [] for op in ops.get_default_graph().get_operations(): if op.type in ["Variable", "AutoReloadVariable"]: var_list.append(op.outputs[0]) if not var_list: return None else: ranks = [] for var in var_list: with ops.colocate_with(var.op): ranks.append(array_ops.rank_internal(var, optimize=False)) if len(ranks) == 1: return ranks[0] else: return array_ops.pack(ranks)
def _sample_n(self, n, seed=None): # Recall _assert_valid_mu ensures mu and self._cov have same batch shape. shape = array_ops.concat(0, [self._cov.vector_shape(), [n]]) white_samples = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) correlated_samples = self._cov.sqrt_matmul(white_samples) # Move the last dimension to the front perm = array_ops.concat(0, ( array_ops.pack([array_ops.rank(correlated_samples) - 1]), math_ops.range(0, array_ops.rank(correlated_samples) - 1))) # TODO(ebrevdo): Once we get a proper tensor contraction op, # perform the inner product using that instead of batch_matmul # and this slow transpose can go away! correlated_samples = array_ops.transpose(correlated_samples, perm) samples = correlated_samples + self.mu return samples
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.pack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape( ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [ array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros([], dtype=dtypes.int32) ]
def call(self, inputs): shape = inputs.get_shape().as_list() input_dim = shape[-1] output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Reshape the input to 2D. output_shape_tensors = array_ops.unpack(array_ops.shape(inputs)) output_shape_tensors[-1] = self.units output_shape_tensor = array_ops.pack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) outputs = standard_ops.matmul(inputs, self.w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if len(output_shape) > 2: # Reshape the output back to the original ndim of the input. outputs = array_ops.reshape(outputs, output_shape_tensor) outputs.set_shape(output_shape) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def tensors_to_item(self, keys_to_tensors): indices = keys_to_tensors[self._indices_key] values = keys_to_tensors[self._values_key] if self._shape_key: shape = keys_to_tensors[self._shape_key] if isinstance(shape, ops.SparseTensor): shape = sparse_ops.sparse_tensor_to_dense(shape) elif self._shape: shape = self._shape else: shape = indices.shape indices_shape = array_ops.shape(indices.indices) rank = indices_shape[1] ids = math_ops.to_int64(indices.values) indices_columns_to_preserve = array_ops.slice( indices.indices, [0, 0], array_ops.pack([-1, rank - 1])) new_indices = array_ops.concat(1, [indices_columns_to_preserve, array_ops.reshape(ids, [-1, 1])]) tensor = ops.SparseTensor(new_indices, values.values, shape) if self._densify: tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value) return tensor
def _flip_front_dims_to_back(self, x): """Flip x to make x.shape = chol.shape[:-1] + [M1*...*Mr].""" # E.g. suppose # chol.shape = [N1,...,Nn, k, k] # x.shape = [M1,...,Mm, N1,...,Nn, k] # Then we want to return x_flipped where # x_flipped.shape = [N1,...,Nn, k, M1*...*Mm]. x_shape = array_ops.shape(x) x_rank = array_ops.rank(x) m = x_rank + 1 - self.rank() x_shape_left = array_ops.slice(x_shape, [0], [m]) # Permutation corresponding to [N1,...,Nn, k, M1,...,Mm] perm = array_ops.concat( 0, (math_ops.range(m, x_rank), math_ops.range(0, m))) x_permuted = array_ops.transpose(x, perm=perm) # Now that things are ordered correctly, condense the last dimensions. # condensed_shape = [M1*...*Mm] condensed_shape = array_ops.pack([math_ops.reduce_prod(x_shape_left)]) new_shape = array_ops.concat(0, (self.vector_shape(), condensed_shape)) return array_ops.reshape(x_permuted, new_shape)
def __init__(self, logits, dtype=dtypes.int32, strict=True, name="Categorical"): """Initialize Categorical distributions using class log-probabilities. Args: logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities of a set of Categorical distributions. The first `N - 1` dimensions index into a batch of independent distributions and the last dimension indexes into the classes. dtype: The type of the event samples (default: int32). strict: Unused in this distribution. name: A name for this distribution (optional). """ self._name = name self._dtype = dtype self._strict = strict with ops.op_scope([logits], name): self._logits = ops.convert_to_tensor(logits, name="logits") logits_shape = array_ops.shape(self._logits) self._batch_rank = array_ops.size(logits_shape) - 1 self._batch_shape = array_ops.slice( logits_shape, [0], array_ops.pack([self._batch_rank])) self._num_classes = array_ops.gather(logits_shape, self._batch_rank)
def reverse_seq(input_seq, lengths): if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.matrix(None, None) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def RNN(x, output_w, output_b): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) # x:(40 list,every list--120*200 tensor) batch_size = array_ops.shape(x[0])[0] state = array_ops.zeros(array_ops.pack([batch_size, n_hidden]), dtype="float") state.set_shape([None, n_hidden]) outputs = [] with tf.variable_scope("RNN_network") as scope: for time_step in range(n_steps): if time_step > 0: scope.reuse_variables() for i in range(len(periods)): if time_step % periods[i] == 0: group_index = periods[i] WI_i = tf.matmul(x[time_step], input_w[:, 0:(group_index * n_hidden // n_steps)]) hidden_mask = tf.mul(hidden_w, clockwork_mask) WH_i = tf.matmul( state, hidden_mask[:, 0:(group_index * n_hidden // n_steps)]) y_update = tf.add(WH_i, WI_i) y_update = tanh(y_update) state = tf.concat(1, [ y_update, state[:, (group_index * n_hidden // n_steps):n_hidden] ]) outputs.append(state) output = tf.matmul(outputs[-1], output_w['out_w']) output = tf.nn.bias_add(output, output_b['out_b']) return output, outputs
def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"): """Adds ops to list the names of uninitialized variables. When run, it returns a 1-D tensor containing the names of uninitialized variables if there are any, or an empty array if there are none. Args: var_list: List of `Variable` objects to check. Defaults to the value of `global_variables() + local_variables()` name: Optional name of the `Operation`. Returns: A 1-D tensor containing names of the uninitialized variables, or an empty 1-D tensor if there are no variables or no uninitialized variables. """ if var_list is None: var_list = global_variables() + local_variables() # Backwards compatibility for old-style variables. TODO(touts): remove. if not var_list: var_list = [] for op in ops.get_default_graph().get_operations(): if op.type in ["Variable", "AutoReloadVariable"]: var_list.append(op.outputs[0]) with ops.name_scope(name): if not var_list: # Return an empty tensor so we only need to check for returned tensor # size being 0 as an indication of model ready. return array_ops.constant([], dtype=dtypes.string) else: # Get a 1-D boolean tensor listing whether each variable is initialized. variables_mask = math_ops.logical_not(array_ops.pack( [state_ops.is_variable_initialized(v) for v in var_list])) # Get a 1-D string tensor containing all the variable names. variable_names_tensor = array_ops.constant([s.op.name for s in var_list]) # Return a 1-D tensor containing all the names of uninitialized variables. return array_ops.boolean_mask(variable_names_tensor, variables_mask)
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Categorical distribution. Args: n: 0-D. Number of independent samples to draw for each distribution. seed: Random seed (optional). name: A name for this operation (optional). Returns: An `int64` `Tensor` with shape `[n, batch_shape, event_shape]` """ with ops.name_scope(self.name): with ops.op_scope([self.logits, n], name): n = ops.convert_to_tensor(n, name="n") logits_2d = array_ops.reshape( self.logits, array_ops.pack([-1, self.num_classes])) samples = random_ops.multinomial(logits_2d, n, seed=seed) ret = array_ops.reshape( array_ops.transpose(samples), array_ops.concat( 0, [array_ops.expand_dims(n, 0), self.batch_shape()])) ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n)) .concatenate(self.get_batch_shape())) return ret
def _assert_compatible_shapes(mu, sigma): r_mu = array_ops.rank(mu) r_sigma = array_ops.rank(sigma) sigma_shape = array_ops.shape(sigma) sigma_rank = array_ops.rank(sigma) mu_shape = array_ops.shape(mu) return control_flow_ops.group( logging_ops.Assert( math_ops.equal(r_mu + 1, r_sigma), ["Rank of mu should be one less than rank of sigma, but saw: ", r_mu, " vs. ", r_sigma]), logging_ops.Assert( math_ops.equal( array_ops.gather(sigma_shape, sigma_rank - 2), array_ops.gather(sigma_shape, sigma_rank - 1)), ["Last two dimensions of sigma (%s) must be equal: " % sigma.name, sigma_shape]), logging_ops.Assert( math_ops.reduce_all(math_ops.equal( mu_shape, array_ops.slice( sigma_shape, [0], array_ops.pack([sigma_rank - 1])))), ["mu.shape and sigma.shape[:-1] must match, but saw: ", mu_shape, " vs. ", sigma_shape]))
def assert_variables_initialized(var_list=None): """Returns an Op to check if variables are initialized. When run, the returned Op will raise the exception `FailedPreconditionError` if any of the variables has not yet been initialized. Note: This function is implemented by trying to fetch the values of the variables. If one of the variables is not initialized a message may be logged by the C++ runtime. This is expected. Args: var_list: List of `Variable` objects to check. Defaults to the value of `all_variables().` Returns: An Op, or None if there are no variables. """ if var_list is None: var_list = all_variables() # Backwards compatibility for old-style variables. TODO(mdevin): remove. if not var_list: var_list = [] for op in ops.get_default_graph().get_operations(): if op.type in ["Variable", "AutoReloadVariable"]: var_list.append(op.outputs[0]) if not var_list: return None else: ranks = [] for var in var_list: with ops.device(var.device): ranks.append(array_ops.rank(var)) if len(ranks) == 1: return ranks[0] else: return array_ops.pack(ranks)
def inference_graph(self, input_data, data_spec=None): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or SparseTensor or placeholder for input data. data_spec: A list of tf.dtype values specifying the original types of each column. Returns: The last op in the random forest inference graph. """ data_spec = ([constants.DATA_FLOAT] * self.params.num_features if data_spec is None else data_spec) probabilities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): tree_data = input_data if self.params.bagged_features: tree_data = self._bag_features(i, input_data) probabilities.append(self.trees[i].inference_graph(tree_data, data_spec)) with ops.device(self.device_assigner.get_device(0)): all_predict = array_ops.pack(probabilities) return math_ops.reduce_sum(all_predict, 0) / self.params.num_trees
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` or a list of `time_len` tensors of shape `[batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. scope: `VariableScope` for the created subgraph; defaults to class name. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ with vs.variable_scope(scope or type(self).__name__): is_list = isinstance(inputs, list) if is_list: inputs = array_ops.pack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros(array_ops.pack( [batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None" ) if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask(sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile(array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat(0, [ array_ops.expand_dims(initial_cell_state, [0]), cell_states ]) mod_outputs = array_ops.concat( 0, [array_ops.expand_dims(initial_output, [0]), outputs]) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unpack(outputs) return outputs, (final_cell_state, final_output)
def _block_lstm(seq_len_max, x, w, b, cs_prev=None, h_prev=None, wci=None, wcf=None, wco=None, forget_bias=None, cell_clip=None, use_peephole=None, name=None): r"""TODO(williamchan): add doc. Args: seq_len_max: A `Tensor` of type `int64`. x: A list of at least 1 `Tensor` objects of the same type in: `float32`. w: A `Tensor`. Must have the same type as `x`. b: A `Tensor`. Must have the same type as `x`. cs_prev: A `Tensor`. Must have the same type as `x`. h_prev: A `Tensor`. Must have the same type as `x`. wci: A `Tensor`. Must have the same type as `x`. wcf: A `Tensor`. Must have the same type as `x`. wco: A `Tensor`. Must have the same type as `x`. forget_bias: An optional `float`. Defaults to `1`. cell_clip: An optional `float`. Defaults to `3`. use_peephole: An optional `bool`. Defaults to `False`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (i, cs, f, o, ci, co, h). i: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. cs: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. f: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. o: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. ci: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. co: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. h: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. Raises: ValueError: If `b` does not have a valid shape. """ batch_size = x[0].get_shape().with_rank(2)[0].value cell_size4 = b.get_shape().with_rank(1)[0].value if cell_size4 is None: raise ValueError("`b` shape must not be None.") cell_size = cell_size4 / 4 zero_state = None if cs_prev is None or h_prev is None: zero_state = array_ops.constant(0, dtype=dtypes.float32, shape=[batch_size, cell_size]) if cs_prev is None: cs_prev = zero_state if h_prev is None: h_prev = zero_state if wci is None: wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size]) wco = wci wcf = wci # pylint: disable=protected-access i, cs, f, o, ci, co, h = _lstm_ops_so.block_lstm(seq_len_max=seq_len_max, x=array_ops.pack(x), cs_prev=cs_prev, h_prev=h_prev, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=forget_bias, cell_clip=cell_clip, name=name, use_peephole=use_peephole) return array_ops.unpack(i), array_ops.unpack(cs), array_ops.unpack( f), array_ops.unpack(o), array_ops.unpack(ci), array_ops.unpack( co), array_ops.unpack(h)
def _event_shape(self): return array_ops.pack([self._cov.vector_space_dimension()])
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected, # add weights_nomalizer_* options. """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer or long, the number of output units in the layer. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not (isinstance(num_outputs, six.integer_types)): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) with variable_scope.variable_scope(scope, 'fully_connected', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype inputs_shape = inputs.get_shape() num_input_units = utils.last_dimension(inputs_shape, min_rank=2) static_shape = inputs_shape.as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs), len(static_shape)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): """Creates a (batch of) lower triangular matrix from a vector of inputs. If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1, b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e., `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`. Although the non-batch complexity is O(n^2), large constants and sub-optimal vectorization means the complexity of this function is 5x slower than zeroing out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This function becomes competitive only when several matmul/cholesky/etc ops can be ellided in constructing the input. Example: wiring a fully connected layer as a covariance matrix; this function reduces the final layer by 2x and possibly reduces the network arch complexity considerably. In most cases it is better to simply build a full matrix and zero out the upper triangular elements, e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly construct a lower triangular. Example: ```python fill_lower_triangular([1, 2, 3, 4, 5, 6]) # Returns: [[1, 0, 0], # [2, 3, 0], # [4, 5, 6]] ``` For comparison, a pure numpy version of this function can be found in `distribution_util_test.py`, function `_fill_lower_triangular`. Args: x: `Tensor` representing lower triangular elements. validate_args: `Boolean`, default `False`. Whether to ensure the shape of `x` can be mapped to a lower triangular matrix (controls non-static checks only). name: `String`. The name to give this op. Returns: tril: `Tensor` with lower triangular elements filled from `x`. Raises: ValueError: if shape if `x` has static shape which cannot be mapped to a lower triangular matrix. """ # TODO(jvdillon): Replace this code with dedicated op when it exists. with ops.name_scope(name, values=(x, )): x = ops.convert_to_tensor(x, name="x") if (x.get_shape().ndims is not None and x.get_shape()[-1].value is not None): d = x.get_shape()[-1].value # d = n(n+1)/2 implies n is: n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.)) d_inferred = n * (n + 1) / 2 if d != d_inferred: raise ValueError( "Input cannot be mapped to a lower triangular; " "n*(n+1)/2 = %d != %d" % (d_inferred, d)) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([n, n])) else: d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32) # d = n(n+1)/2 implies n is: n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.), dtype=dtypes.int32) if validate_args: is_valid_input_shape = check_ops.assert_equal( n * (n + 1) / 2, d, message="Input cannot be mapped to a lower triangular.") n = control_flow_ops.with_dependencies([is_valid_input_shape], n) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([None, None])) def tril_ids(n): """Internal helper to create vector of linear indices into y.""" # Build the ids statically; chose 512 because it implies 1MiB. if not contrib_framework.is_tensor(n) and n <= 512: ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32) # We could also zero out when (rows < cols) == (rows < ids-n*rows). # mask = (ids <= (n + 1) * rows).astype(np.int32) else: ids = math_ops.range(n**2) rows = math_ops.cast(ids / n, dtype=dtypes.int32) offset = math_ops.cast(rows * (2 * n - rows - 1) / 2, dtype=dtypes.int32) return ids - offset # Special-case non-batch case. if x.get_shape().ndims == 1: y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n])) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y # Make ids for each batch dim. if (x.get_shape().ndims is not None and x.get_shape()[:-1].is_fully_defined()): batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32) m = np.prod(batch_shape).astype(np.int32) else: batch_shape = array_ops.shape(x)[:-1] m = array_ops.reduce_prod(array_ops.shape(x)[:-1]) batch_ids = math_ops.range(m) # Assemble the tril_ids into batch,tril_id pairs. idx = array_ops.pack([ array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]), array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1]) ]) idx = array_ops.transpose(idx, [1, 2, 0]) # Gather up, reshape, and return. y = array_ops.reshape(x, [-1, d]) y = array_ops.gather_nd(y, idx) y = array_ops.reshape(y, array_ops.concat_v2([batch_shape, [n, n]], 0)) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y
def sparse_to_indicator(sp_input, vocab_size, name=None): """Converts a `SparseTensor` of ids into a dense bool indicator tensor. The last dimension of `sp_input` is discarded and replaced with the values of `sp_input`. If `sp_input.shape = [D0, D1, ..., Dn, K]`, then `output.shape = [D0, D1, ..., Dn, vocab_size]`, where output[d_0, d_1, ..., d_n, sp_input[d_0, d_1, ..., d_n, k]] = True and False elsewhere in `output`. For example, if `sp_input.shape = [2, 3, 4]` with non-empty values: [0, 0, 0]: 0 [0, 1, 0]: 10 [1, 0, 3]: 103 [1, 1, 2]: 150 [1, 1, 3]: 149 [1, 1, 4]: 150 [1, 2, 1]: 121 and `vocab_size = 200`, then the output will be a `[2, 3, 200]` dense bool tensor with False everywhere except at positions (0, 0, 0), (0, 1, 10), (1, 0, 103), (1, 1, 149), (1, 1, 150), (1, 2, 121). Note that repeats are allowed in the input SparseTensor. This op is useful for converting `SparseTensor`s into dense formats for compatibility with ops that expect dense tensors. The input `SparseTensor` must be in row-major order. Args: sp_input: A `SparseTensor` of type `int32` or `int64`. vocab_size: The new size of the last dimension, with `all(0 <= sp_input.values < vocab_size)`. name: A name prefix for the returned tensors (optional) Returns: A dense bool indicator tensor representing the indices with specified value. Raises: TypeError: If `sp_input` is not a `SparseTensor`. """ if not isinstance(sp_input, ops.SparseTensor): raise TypeError("Input must be a SparseTensor") with ops.op_scope([sp_input], name, "SparseToIndicator") as name: indices_shape = array_ops.shape(sp_input.indices) num_entries = indices_shape[0] rank = indices_shape[1] ids = sp_input.values if ids.dtype != dtypes.int64: ids = math_ops.cast(ids, dtypes.int64) # Slice off the last dimension of indices, then then tack on the ids indices_columns_to_preserve = array_ops.slice( sp_input.indices, [0, 0], array_ops.pack([-1, rank - 1])) new_indices = array_ops.concat( 1, [indices_columns_to_preserve, array_ops.reshape(ids, [-1, 1])]) new_values = array_ops.fill(array_ops.expand_dims(num_entries, 0), True) new_shape = array_ops.concat(0, [ array_ops.slice(sp_input.shape, [0], array_ops.expand_dims(rank - 1, 0)), [vocab_size] ]) sp_new = ops.SparseTensor(new_indices, new_values, new_shape) # validate_indices may be False because we allow duplicates in new_indices: # repeated indices are allowed when creating an indicator matrix. return sparse_tensor_to_dense(sp_new, default_value=False, validate_indices=False, name=name)
def dynamic_distraction_m2_decoder(decoder_inputs, initial_state, distract_initial_state, attention_states, attention_states_query, cell1,cell2, distraction_cell, output_size=None, num_heads=1, loop_function=None, dtype=None, scope=None, initial_state_attention=False): """RNN decoder with attention for the sequence-to-sequence model. In this context "attention" means that, during decoding, the RNN can look up information in the additional tensor attention_states, and it does this by focusing on a few entries from the tensor. This model has proven to yield especially good results in a number of sequence-to-sequence tasks. This implementation is based on http://arxiv.org/abs/1412.7449 (see below for details). It is recommended for complex sequence-to-sequence tasks. Args: decoder_inputs: A list of 2D Tensors [batch_size x input_size]. initial_state: 2D Tensor [batch_size x cell.state_size]. attention_states: 3D Tensor [batch_size x attn_length x attn_size]. cell: rnn_cell.RNNCell defining the cell function and size. output_size: Size of the output vectors; if None, we use cell.output_size. num_heads: Number of attention heads that read from attention_states. loop_function: If not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol). This can be used for decoding, but also for training to emulate http://arxiv.org/abs/1506.03099. Signature -- loop_function(prev, i) = next * prev is a 2D Tensor of shape [batch_size x output_size], * i is an integer, the step number (when advanced control is needed), * next is a 2D Tensor of shape [batch_size x input_size]. dtype: The dtype to use for the RNN initial state (default: tf.float32). scope: VariableScope for the created subgraph; default: "attention_decoder". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states -- useful when we wish to resume decoding from a previously stored decoder state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors of shape [batch_size x output_size]. These represent the generated outputs. Output i is computed from input i (which is either the i-th element of decoder_inputs or loop_function(output {i-1}, i)) as follows. First, we run the cell on a combination of the input and previous attention masks: cell_output, new_state = cell(linear(input, prev_attn), prev_state). Then, we calculate new attention masks: new_attn = softmax(V^T * tanh(W * attention_states + U * new_state)) and then we calculate the output: output = linear(cell_output, new_attn). state: The state of each decoder cell the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: when num_heads is not positive, there are no inputs, shapes of attention_states are not set, or input size cannot be inferb_a from the input. """ if decoder_inputs is None: raise ValueError("Must provide at least 1 input to attention decoder.") if num_heads < 1: raise ValueError("With less than 1 heads, use a non-attention decoder.") if attention_states.get_shape()[2].value is None: raise ValueError("Shape[2] of attention_states must be known: %s" % attention_states.get_shape()) if output_size is None: output_size = cell1.output_size with variable_scope.variable_scope( scope or "dynamic_distraction_m2_decoder", dtype=dtype) as scope: dtype = scope.dtype batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. attn_length_state = attention_states.get_shape()[1].value attn_length_query = attention_states_query.get_shape()[1].value dim_1 = initial_state.get_shape()[1].value dim_2 = cell1.output_size project_initial_state_W = variable_scope.get_variable("Initial_State_W", [dim_1, dim_2]) project_initial_state_B = variable_scope.get_variable("Initial_State_Bias", [dim_2]) print ("Preksha " + scope.name) if attn_length_state is None: attn_length_state = shape(attention_states)[1] if attn_length_query is None: attn_length_query = shape(attention_states_query)[1] attn_size_state = attention_states.get_shape()[2].value attn_size_query = attention_states_query.get_shape()[2].value b_a = variable_scope.get_variable("b_a", [1, attn_size_state]) # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden_states = array_ops.reshape( attention_states, [-1, attn_length_state, 1, attn_size_state]) hidden_states_query = array_ops.reshape( attention_states_query, [-1, attn_length_query, 1, attn_size_query]) hidden_features_states = [] hidden_features_query = [] v_state = [] attention_vec_size_state = attn_size_state # Size of query vectors for attention. for a in xrange(num_heads): k = variable_scope.get_variable("AttnW_State_%d" % a, [1, 1, attn_size_state, attention_vec_size_state]) hidden_features_states.append(nn_ops.conv2d(hidden_states, k, [1, 1, 1, 1], "SAME")) v_state.append( variable_scope.get_variable("AttnV_State_%d" % a, [attention_vec_size_state])) v_query = [] attention_vec_size_query = attn_size_query # Size of query vectors for attention. for a in xrange(num_heads): k = variable_scope.get_variable("AttnW_Query_%d" %a, [1, 1, attn_size_query, attention_vec_size_query]) hidden_features_query.append(nn_ops.conv2d(hidden_states_query, k, [1, 1, 1, 1], "SAME")) v_query.append( variable_scope.get_variable("AttnV_Query_%d" % a, [attention_vec_size_query])) state_1 = math_ops.matmul(initial_state, project_initial_state_W) + project_initial_state_B state_2 = state_1 prev_states = [] for i in range(attn_length_state): prev_states.append(array_ops.zeros([batch_size])) def attention(query, prev_states, b_a): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(1, query_list) for a in xrange(num_heads): with variable_scope.variable_scope("Attention_%d" % a): y = linear(query, attention_vec_size_state, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size_state]) # Attention mask is a softmax of v^T * tanh(...). temp = hidden_features_states[a] + y new_states = array_ops.squeeze(temp, [2]) new_states_list = array_ops.unpack(new_states, axis=1) #print(temp.get_shape(), new_states.get_shape(), len(new_states_list), new_states_list[0].get_shape()) distract_states_list = [] for i, _ in enumerate(new_states_list): temp = array_ops.reshape(prev_states[i], [-1, 1]) t1 = math_ops.matmul(temp, b_a) print ("b_a size and prev_states size", temp.get_shape(), prev_states[i].get_shape(), b_a.get_shape(), t1.get_shape()) distract_states_list.append(new_states_list[i] - t1) distract_states = array_ops.pack(distract_states_list, axis=1) print (len(distract_states_list), distract_states.get_shape()) s = math_ops.reduce_sum( v_state[a] * math_ops.tanh(distract_states), [2]) print(s.get_shape()) a = nn_ops.softmax(s) prev_states = array_ops.pack(prev_states, axis=1) prev_states = prev_states + a # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length_state, 1, 1]) * hidden_states, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size_state])) return ds, array_ops.unpack(prev_states, axis=1) def attention_query(query): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(1, query_list) for a in xrange(num_heads): with variable_scope.variable_scope("Attention_Query_%d" % a): y = linear(query, attention_vec_size_query, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size_query]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum( v_query[a] * math_ops.tanh(hidden_features_query[a] + y), [2, 3]) a = nn_ops.softmax(s) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length_query, 1, 1]) * hidden_states_query, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size_query])) return ds[0] outputs = [] prev = None batch_attn_size_state = array_ops.pack([batch_size, attn_size_state]) batch_attn_size_query = array_ops.pack([batch_size, attn_size_query]) attns_state = [array_ops.zeros(batch_attn_size_state, dtype=dtype) for _ in xrange(num_heads)] attns_query = [array_ops.zeros(batch_attn_size_query, dtype=dtype) for _ in xrange(num_heads)] for a in attns_state: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size_state]) for a in attns_query: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size_query]) acc_ctx = array_ops.zeros([batch_size, attn_size_state]) if initial_state_attention: attns_query = attention_query(initial_state) list_of_queries = [initial_state, attns_query] attns_state, prev_states = attention(list_of_queries, prev_states) for i, inp in enumerate(decoder_inputs): if i > 0: variable_scope.get_variable_scope().reuse_variables() # If loop_function is set, we use it instead of decoder_inputs. if loop_function is not None and prev is not None: with variable_scope.variable_scope("loop_function", reuse=True): inp = loop_function(prev, i) # Merge input and previous attentions into one vector of the right size. input_size = inp.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from input: %s" % inp.name) with variable_scope.variable_scope("Cell2"): input_2 = linear([state_1] + [inp], input_size, True) output_2, state_2 = cell2(input_2, state_2) # Run the RNN. #print (x.get_shape()) # Run the attention mechanism. if i == 0 and initial_state_attention: with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True): attns_query = attention_query(output_2) list_of_queries = [state, attns_query] attns_state, prev_states = attention(list_of_queries, prev_states, b_a) else: attns_query = attention_query(output_2) list_of_queries = [output_2, attns_query] attns_state, prev_states = attention(list_of_queries, prev_states, b_a) with variable_scope.variable_scope("AttnOutputProjection"): #W = variable_scope.get_variable("W", [1,attn_size_state]) #U = variable_scope.get_variable("U", [1,attn_size_state]) #new_ctx = math_ops.mul(W, attns_state[0]) - math_ops.mul(U, acc_ctx) #new_ctx = math_ops.tanh(new_ctx) #acc_ctx = acc_ctx + new_ctx with variable_scope.variable_scope("Cell1"): input_1 = linear([output_2] + [attns_state[0]], input_size, True) output_1, state_1 = cell1(input_1, state_1) output = math_ops.tanh(linear([inp] + [output_1] + [attns_state[0]], output_size, True)) #x_shape = variable_scope.get_variable(name = 'x_shape',shape=cell_output.get_shape()) if loop_function is not None: prev = output outputs.append(output) return outputs, state_1
def legacy_fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=init_ops.zeros_initializer, name=None, weight_collections=(ops.GraphKeys.WEIGHTS,), bias_collections=(ops.GraphKeys.BIASES,), output_collections=(ops.GraphKeys.ACTIVATIONS,), trainable=True, weight_regularizer=None, bias_regularizer=None): # pylint: disable=anomalous-backslash-in-string r"""Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)] with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix multiply along the first dimensions. The result r is a tensor of shape [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`], where \\\( r_{i_0, ..., i_{n-1}, k} = \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\). This is accomplished by reshaping `x` to 2-D [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)] before the matrix multiply and afterwards reshaping it to [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`]. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and in which collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): dims = x.get_shape().dims if dims is None: raise ValueError('dims of x must be known but is None') if len(dims) < 2: raise ValueError('rank of x must be at least 2 not: %d' % len(dims)) num_input_units = dims[-1].value if num_input_units is None: raise ValueError('last dimension of x must be known but is None') dtype = x.dtype.base_dtype weight_collections = set(list(weight_collections or []) + [ops.GraphKeys.VARIABLES]) w = variable_scope.get_variable('weights', shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer, trainable=trainable) x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x, [-1, num_input_units]) y = standard_ops.matmul(x_2_dim, w) if bias_init is not None: bias_collections = set(list(bias_collections or []) + [ops.GraphKeys.VARIABLES]) b = variable_scope.get_variable('bias', shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer, trainable=trainable) y = nn.bias_add(y, b) if len(dims) > 2: out_shape = array_ops.unpack(array_ops.shape(x)) out_shape[-1] = num_output_units y = array_ops.reshape(y, array_ops.pack(out_shape)) static_shape = x.get_shape().as_list() static_shape[-1] = num_output_units y.set_shape(static_shape) return _apply_activation(y, activation_fn, output_collections)
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape `[num_classes, dim]`. The (possibly-partitioned) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) subtract_log_q: A `bool`. whether to subtract the log expected count of the labels in the sample to get the logits of the true labels. Default is True. Turn off for Negative Sampling. remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is False. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: out_logits, out_labels: `Tensor` objects each with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). """ if not isinstance(weights, list): weights = [weights] with ops.op_scope( weights + [biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice( all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast( acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( 0, [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( 1, [array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def attention_decoder(decoder_inputs, initial_state, attention_states, cell, output_size=None, num_heads=1, loop_function=None, dtype=dtypes.float32, scope=None, initial_state_attention=False): """RNN decoder with attention for the sequence-to-sequence model. Args: decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. initial_state: 2D Tensor [batch_size x cell.state_size]. attention_states: 3D Tensor [batch_size x attn_length x attn_size]. cell: rnn_cell.RNNCell defining the cell function and size. output_size: size of the output vectors; if None, we use cell.output_size. num_heads: number of attention heads that read from attention_states. loop_function: if not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol). This can be used for decoding, but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf. Signature -- loop_function(prev, i) = next * prev is a 2D Tensor of shape [batch_size x cell.output_size], * i is an integer, the step number (when advanced control is needed), * next is a 2D Tensor of shape [batch_size x cell.input_size]. dtype: The dtype to use for the RNN initial state (default: tf.float32). scope: VariableScope for the created subgraph; default: "attention_decoder". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states -- useful when we wish to resume decoding from a previously stored decoder state and attention states. Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors of shape [batch_size x output_size]. These represent the generated outputs. Output i is computed from input i (which is either i-th decoder_inputs or loop_function(output {i-1}, i)) as follows. First, we run the cell on a combination of the input and previous attention masks: cell_output, new_state = cell(linear(input, prev_attn), prev_state). Then, we calculate new attention masks: new_attn = softmax(V^T * tanh(W * attention_states + U * new_state)) and then we calculate the output: output = linear(cell_output, new_attn). state: The state of each decoder cell the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: when num_heads is not positive, there are no inputs, or shapes of attention_states are not set. """ if not decoder_inputs: raise ValueError("Must provide at least 1 input to attention decoder.") if num_heads < 1: raise ValueError("With less than 1 heads, use a non-attention decoder.") if not attention_states.get_shape()[1:2].is_fully_defined(): raise ValueError("Shape[1] and [2] of attention_states must be known: %s" % attention_states.get_shape()) if output_size is None: output_size = cell.output_size with vs.variable_scope(scope or "attention_decoder"): batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. attn_length = attention_states.get_shape()[1].value attn_size = attention_states.get_shape()[2].value # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape( attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] v = [] attention_vec_size = attn_size # Size of query vectors for attention. for a in xrange(num_heads): k = vs.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size])) state = initial_state def attention(query): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. for a in xrange(num_heads): with vs.variable_scope("Attention_%d" % a): y = rnn_cell.linear(query, attention_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum( v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) a = nn_ops.softmax(s) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return ds outputs = [] prev = None batch_attn_size = array_ops.pack([batch_size, attn_size]) attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) if initial_state_attention: attns = attention(initial_state) for i in xrange(len(decoder_inputs)): if i > 0: vs.get_variable_scope().reuse_variables() inp = decoder_inputs[i] # If loop_function is set, we use it instead of decoder_inputs. if loop_function is not None and prev is not None: with vs.variable_scope("loop_function", reuse=True): inp = array_ops.stop_gradient(loop_function(prev, i)) # Merge input and previous attentions into one vector of the right size. x = rnn_cell.linear([inp] + attns, cell.input_size, True) # Run the RNN. cell_output, state = cell(x, state) # Run the attention mechanism. if i == 0 and initial_state_attention: with vs.variable_scope(vs.get_variable_scope(), reuse=True): attns = attention(state) else: attns = attention(state) with vs.variable_scope("AttnOutputProjection"): output = rnn_cell.linear([cell_output] + attns, output_size, True) if loop_function is not None: # We do not propagate gradients over the loop function. prev = array_ops.stop_gradient(output) outputs.append(output) return outputs, state
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.shape original_rank_dim = sparse_ids.shape.get_shape()[0] original_rank = (array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = ops.SparseTensor(sparse_ids.indices, sparse_weights.values, sparse_ids.shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat(0, [ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ])) final_result.set_shape( tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate( result.get_shape()[1:])) return final_result