def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.pack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unpack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unpack(inputs) outputs, state = rnn.rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.pack(outputs) return outputs, state
def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. Args: x: Input Tensor [batch_size, input_length, embed_dim]. y: Output Tensor [batch_size, output_length, embed_dim]. input_length: length of input x. output_length: length of output y. sentinel: optional first input to decoder and final output expected. If sentinel is not provided, zeros are used. Due to fact that y is not available in sampling time, shape of sentinel will be inferred from x. name: Operation name. Returns: Encoder input from x, and decoder inputs and outputs from y. """ with ops.name_scope(name, "seq2seq_inputs", [x, y]): in_x = array_ops_.unpack(x, axis=1) y = array_ops_.unpack(y, axis=1) if not sentinel: # Set to zeros of shape of y[0], using x for batch size. sentinel_shape = array_ops_.pack( [array_ops_.shape(x)[0], y[0].get_shape()[1]]) sentinel = array_ops_.zeros(sentinel_shape) sentinel.set_shape(y[0].get_shape()) in_y = [sentinel] + y out_y = y + [sentinel] return in_x, in_y, out_y
def testCannotInferNumFromNoneShape(self): x = array_ops.placeholder(np.float32, shape=(None,)) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): array_ops.unpack(x) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): array_ops.unstack(x)
def testAxisOutOfNegativeRange(self): a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a') with self.assertRaisesRegexp(ValueError, r'axis = -3 not in \[-2, 2\)'): array_ops.unpack(a, axis=-3) with self.assertRaisesRegexp(ValueError, r'axis = -3 not in \[-2, 2\)'): array_ops.unstack(a, axis=-3)
def testCannotInferNumFromNoneShape(self): x = array_ops.placeholder(np.float32, shape=(None, )) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): array_ops.unpack(x) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): array_ops.unstack(x)
def testCannotInferNumFromUnknownShape(self): x = array_ops.placeholder(np.float32) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape <unknown>'): array_ops.unpack(x) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape <unknown>'): array_ops.unstack(x)
def attention(query, prev_states, b_a): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(1, query_list) for a in xrange(num_heads): with variable_scope.variable_scope("Attention_%d" % a): y = linear(query, attention_vec_size_state, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size_state]) # Attention mask is a softmax of v^T * tanh(...). temp = hidden_features_states[a] + y new_states = array_ops.squeeze(temp, [2]) new_states_list = array_ops.unpack(new_states, axis=1) #print(temp.get_shape(), new_states.get_shape(), len(new_states_list), new_states_list[0].get_shape()) distract_states_list = [] for i, _ in enumerate(new_states_list): temp = array_ops.reshape(prev_states[i], [-1, 1]) t1 = math_ops.matmul(temp, b_a) print("b_a size and prev_states size", temp.get_shape(), prev_states[i].get_shape(), b_a.get_shape(), t1.get_shape()) distract_states_list.append(new_states_list[i] - t1) distract_states = array_ops.pack(distract_states_list, axis=1) print(len(distract_states_list), distract_states.get_shape()) s = math_ops.reduce_sum( v_state[a] * math_ops.tanh(distract_states), [2]) print(s.get_shape()) a = nn_ops.softmax(s) prev_states = array_ops.pack(prev_states, axis=1) prev_states = prev_states + a # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length_state, 1, 1]) * hidden_states, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size_state])) return ds, array_ops.unpack(prev_states, axis=1)
def testSimple(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) # Convert data to a single tensorflow tensor x = constant_op.constant(data) # Unpack into a list of tensors cs_unpacked = array_ops.unpack(x, num=shape[0]) cs_unstacked = array_ops.unpack(x, num=shape[0]) for cs in (cs_unpacked, cs_unstacked): self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) cs = [c.eval() for c in cs] self.assertAllEqual(cs, data)
def testSimple(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) # Convert data to a single tensorflow tensor x = constant_op.constant(data) # Unpack into a list of tensors cs_unpacked = array_ops.unpack(x, num=shape[0]) cs_unstacked = array_ops.unpack(x, num=shape[0]) for cs in (cs_unpacked, cs_unstacked): self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) cs = [c.eval() for c in cs] self.assertAllEqual(cs, data)
def _cat_probs(self, log_probs): """Get a list of num_components batchwise probabilities.""" which_softmax = nn_ops.log_softmax if log_probs else nn_ops.softmax cat_probs = which_softmax(self.cat.logits) cat_probs = array_ops.unpack( cat_probs, num=self.num_components, axis=-1) return cat_probs
def __call__(self, inputs, state, scope=None): """Run this multi-layer cell on inputs, starting from state.""" with vs.variable_scope(scope or type(self).__name__): # "MultiRNNCell" cur_state_pos = 0 cur_inp = inputs new_states = [] for i, cell in enumerate(self._cells): with vs.variable_scope("Cell%d" % i): if self._state_is_tuple: if not nest.is_sequence(state): raise ValueError( "Expected state to be a tuple of length %d, but received: %s" % (len(self.state_size), state)) cur_state = state[i] else: # print("STATE",state) """ cur_state = array_ops.slice( state, [0, cur_state_pos], [-1, cell.state_size]) """ cur_state = array_ops.unpack(state)[i] # cur_state_pos += cell.state_size cur_inp, new_state = cell(cur_inp, cur_state) new_states.append(new_state) """ new_states = (tuple(new_states) if self._state_is_tuple else array_ops.concat(1, new_states)) """ new_states = array_ops.pack(new_states) return cur_inp, new_states
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) for input_ in input_seq: input_.set_shape(input_.get_shape().with_rank(2)) # Join into (time, batch_size, depth) s_joined = array_ops_.pack(input_seq) # Reverse along dimension 0 s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops_.unpack(s_reversed) return result
def testZeroLengthDim(self): with self.test_session(): x = array_ops.zeros(shape=(0, 1, 2)) y = array_ops.unpack(x, axis=1)[0].eval() self.assertEqual(y.shape, (0, 2)) y = array_ops.unstack(x, axis=1)[0].eval() self.assertEqual(y.shape, (0, 2))
def testInferNum(self): with self.test_session(): for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): x = array_ops.placeholder(np.float32, shape=shape) cs = array_ops.unpack(x) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) cs = array_ops.unstack(x) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0])
def testInferNum(self): with self.test_session(): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): x = array_ops.placeholder(np.float32, shape=shape) cs = array_ops.unpack(x) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) cs = array_ops.unstack(x) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0])
def testAxis0Default(self): with self.test_session() as sess: a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a') unpacked = sess.run(array_ops.unpack(a)) unstacked = sess.run(array_ops.unstack(a)) self.assertEqual(len(unpacked), 2) self.assertAllEqual(unpacked[0], [1, 2, 3]) self.assertAllEqual(unpacked[1], [4, 5, 6]) self.assertEqual(len(unstacked), 2) self.assertAllEqual(unstacked[0], [1, 2, 3]) self.assertAllEqual(unstacked[1], [4, 5, 6])
def build(self): self.input_0 = tf.placeholder( tf.float32, [self.config.max_length_0_input, 1, self.config.embedding_size]) self.input_0_length = tf.placeholder(tf.int32) self.input_1 = tf.placeholder( tf.float32, [self.config.max_length_0_input, 1, self.config.embedding_size]) self.input_1_length = tf.placeholder(tf.int32) input_0 = array_ops.unpack(self.input_0) input_1 = array_ops.unpack(self.input_1) # bidirectional rnn cell = rnn_cell.GRUCell(self.config.embedding_size) initial_state_fw = array_ops.zeros(array_ops.pack([1, cell.state_size]), dtype=tf.float32) initial_state_fw.set_shape([1, cell.state_size]) initial_state_bw = array_ops.zeros(array_ops.pack([1, cell.state_size]), dtype=tf.float32) initial_state_bw.set_shape([1, cell.state_size]) states = bidirectional_rnn( cell, cell, input_0, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=tf.float32, # sequence_length=3 ) self.test = array_ops.pack(states)
def _sample_n(self, n, seed=None): # We use 2 uniform random floats to generate polar random variates. # http://dl.acm.org/citation.cfm?id=179631 # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1]. # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0. # Let X = R*cos(theta), and let Y = R*sin(theta). # Then X ~ t_df and Y ~ t_df. # The variates X and Y are not independent. shape = array_ops.concat(0, ([2, n], self.batch_shape())) uniform = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) samples_g, samples_h = array_ops.unpack(uniform, num=2) theta = (2.0 * math.pi) * samples_h r = math_ops.sqrt(self.df * (math_ops.pow(samples_g, -2 / self.df) - 1)) samples = r * math_ops.cos(theta) return samples * self.sigma + self.mu
def testAgainstNumpy(self): # For 1 to 5 dimensions. for i in range(1, 6): a = np.random.random(np.random.permutation(i) + 1) # For all the possible axis to split it, including negative indices. for j in range(-i, i): expected = np_split_squeeze(a, j) with self.test_session() as sess: actual_unpack = sess.run(array_ops.unpack(a, axis=j)) actual_unstack = sess.run(array_ops.unstack(a, axis=j)) self.assertAllEqual(expected, actual_unpack) self.assertAllEqual(expected, actual_unstack)
def testGradientsAxis0(self): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] for i in xrange(shape[0]): with self.test_session(use_gpu=True): x = constant_op.constant(data) cs = array_ops.unpack(x, num=shape[0]) err = gradient_checker.compute_gradient_error(x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6) cs = array_ops.unstack(x, num=shape[0]) err = gradient_checker.compute_gradient_error(x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6)
def testGradientsAxis0(self): for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] for i in xrange(shape[0]): with self.test_session(use_gpu=True): x = constant_op.constant(data) cs = array_ops.unpack(x, num=shape[0]) err = gradient_checker.compute_gradient_error( x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6) cs = array_ops.unstack(x, num=shape[0]) err = gradient_checker.compute_gradient_error( x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6)
def call_rnn_bidir_dynamic(cell_encoder_fw, cell_encoder_bw, embeddings, sequence_length, dtype): embeddings = array_ops.pack(embeddings, axis=1) encoder_outputs, encoder_state = rnn.bidirectional_dynamic_rnn( cell_encoder_fw, cell_encoder_bw, embeddings, sequence_length, dtype=dtype) encoder_outputs = array_ops.concat(2, encoder_outputs) encoder_state = array_ops.concat(1, encoder_state) encoder_outputs = array_ops.unpack(encoder_outputs, axis=1) return encoder_outputs, encoder_state
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_is_tuple = nest.is_sequence(input_seq[0]) flat_input_seq = (nest.flatten(input_) if input_is_tuple else [input_] for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(sequence) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) if input_is_tuple else flat_result[0] for input_, flat_result in zip(input_seq, flat_results)] return results
def testGradientsAxis1(self): for shape in (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) out_shape = list(shape) del out_shape[1] for i in xrange(shape[1]): with self.test_session(use_gpu=True): x = constant_op.constant(data) cs = array_ops.unpack(x, num=shape[1], axis=1) err = gradient_checker.compute_gradient_error(x, shape, cs[i], out_shape) self.assertLess(err, 1e-6) cs = array_ops.unstack(x, num=shape[1], axis=1) err = gradient_checker.compute_gradient_error(x, shape, cs[i], out_shape) self.assertLess(err, 1e-6)
def testGradientsAxis1(self): for shape in (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) out_shape = list(shape) del out_shape[1] for i in xrange(shape[1]): with self.test_session(use_gpu=True): x = constant_op.constant(data) cs = array_ops.unpack(x, num=shape[1], axis=1) err = gradient_checker.compute_gradient_error( x, shape, cs[i], out_shape) self.assertLess(err, 1e-6) cs = array_ops.unstack(x, num=shape[1], axis=1) err = gradient_checker.compute_gradient_error( x, shape, cs[i], out_shape) self.assertLess(err, 1e-6)
def dense_to_sparse_tensor(dense_tensor, ignore_value=None): """Converts a dense Tensor to a SparseTensor, dropping ignore_value cells. Args: dense_tensor: An `Output`. ignore_value: Entries in `dense_tensor` equal to this value will be absent from the return `SparseTensor`. If `None`, default value of dense_tensor's dtype will be used (e.g. '' for `str`, 0 for `int`). Returns: A `SparseTensor` with the same shape as `dense_tensor`. Raises: ValueError: when `dense_tensor`'s rank is `None`. """ with ops.name_scope("DenseToSparseTensor"): dense_t = ops.convert_to_tensor(dense_tensor) if dense_t.get_shape().ndims is None: # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank. raise ValueError( "dense_tensor.get_shape() should be defined, got None.") if ignore_value is None: if dense_t.dtype == dtypes.string: # Exception due to TF strings are converted to numpy objects by default. ignore_value = "" else: ignore_value = dense_t.dtype.as_numpy_dtype() dense_shape = math_ops.cast(array_ops.shape(dense_t), dtypes.int64) indices = array_ops.where( math_ops.not_equal(dense_t, math_ops.cast(ignore_value, dense_t.dtype))) index_dims = len(dense_t.get_shape()) # Flattens the tensor and indices for use with gather. flat_tensor = array_ops.reshape(dense_t, [-1]) flat_indices = indices[:, index_dims - 1] # Computes the correct flattened indices for 2d (or higher) tensors. if index_dims > 1: higher_dims = indices[:, :index_dims - 1] shape_multipliers = array_ops.pack( _multiplier_helper(array_ops.unpack(dense_shape)[1:])) offsets = math_ops.reduce_sum(math_ops.mul(higher_dims, shape_multipliers), reduction_indices=[1]) flat_indices = math_ops.add(flat_indices, offsets) values = array_ops.gather(flat_tensor, flat_indices) return sparse_tensor.SparseTensor(indices, values, dense_shape)
def _ImageDimensions(image): """Returns the dimensions of an image tensor. Args: image: A 3-D Tensor of shape `[height, width, channels]`. Returns: A list of `[height, width, channels]` corresponding to the dimensions of the input image. Dimensions that are statically known are python integers, otherwise they are integer scalar tensors. """ if image.get_shape().is_fully_defined(): return image.get_shape().as_list() else: static_shape = image.get_shape().with_rank(3).as_list() dynamic_shape = array_ops.unpack(array_ops.shape(image), 3) return [s if s is not None else d for s, d in zip(static_shape, dynamic_shape)]
def _ImageDimensions(images, dynamic_shape=False): """Returns the dimensions of an image tensor. Args: images: 4-D Tensor of shape [batch, height, width, channels] dynamic_shape: Whether the input image has undertermined shape. If set to `True`, shape information will be retrieved at run time. Default to `False`. Returns: list of integers [batch, height, width, channels] """ # A simple abstraction to provide names for each dimension. This abstraction # should make it simpler to switch dimensions in the future (e.g. if we ever # want to switch height and width.) if dynamic_shape: return array_ops.unpack(array_ops.shape(images)) else: return images.get_shape().as_list()
def dense_to_sparse_tensor(dense_tensor, ignore_value=None): """Converts a dense Tensor to a SparseTensor, dropping ignore_value cells. Args: dense_tensor: A `Tensor`. ignore_value: Entries in `dense_tensor` equal to this value will be absent from the return `SparseTensor`. If `None`, default value of dense_tensor's dtype will be used (e.g. '' for `str`, 0 for `int`). Returns: A `SparseTensor` with the same shape as `dense_tensor`. Raises: ValueError: when `dense_tensor`'s rank is `None`. """ with ops.name_scope("DenseToSparseTensor"): dense_t = ops.convert_to_tensor(dense_tensor) if dense_t.get_shape().ndims is None: # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank. raise ValueError("dense_tensor.get_shape() should be defined, got None.") if ignore_value is None: if dense_t.dtype == dtypes.string: # Exception due to TF strings are converted to numpy objects by default. ignore_value = "" else: ignore_value = dense_t.dtype.as_numpy_dtype() dense_shape = math_ops.cast(array_ops.shape(dense_t), dtypes.int64) indices = array_ops.where( math_ops.not_equal(dense_t, math_ops.cast(ignore_value, dense_t.dtype))) index_dims = len(dense_t.get_shape()) # Flattens the tensor and indices for use with gather. flat_tensor = array_ops.reshape(dense_t, [-1]) flat_indices = indices[:, index_dims - 1] # Computes the correct flattened indices for 2d (or higher) tensors. if index_dims > 1: higher_dims = indices[:, :index_dims - 1] shape_multipliers = array_ops.pack( _multiplier_helper(array_ops.unpack(dense_shape)[1:])) offsets = math_ops.reduce_sum( math_ops.mul(higher_dims, shape_multipliers), reduction_indices=[1]) flat_indices = math_ops.add(flat_indices, offsets) values = array_ops.gather(flat_tensor, flat_indices) return sparse_tensor.SparseTensor(indices, values, dense_shape)
def _ImageDimensions(images, static_only=True): """Returns the dimensions of an image tensor. Args: images: 4-D Tensor of shape `[batch, height, width, channels]` static_only: Boolean, whether to return only static shape. Returns: list of integers `[batch, height, width, channels]`, when static shape is fully defined or `static_only` is `True`. list of integer scalar tensors `[batch, height, width, channels]`, when static shape is not fully defined. """ # A simple abstraction to provide names for each dimension. This abstraction # should make it simpler to switch dimensions in the future (e.g. if we ever # want to switch height and width.) if static_only or images.get_shape().is_fully_defined(): return images.get_shape().as_list() else: return array_ops.unpack(array_ops.shape(images))
def sample(self, n, seed=None, name="sample"): """Sample `n` observations from the Student t Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.op_scope([self._df, self._mu, self._sigma, n], name): n = ops.convert_to_tensor(n, name="n") n_val = tensor_util.constant_value(n) # We use 2 uniform random floats to generate polar random variates. # http://dl.acm.org/citation.cfm?id=179631 # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1]. # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0. # Let X = R*cos(theta), and let Y = R*sin(theta). # Then X ~ t_df and Y ~ t_df. # The variates X and Y are not independent. shape = array_ops.concat(0, [array_ops.pack([2, n]), self.batch_shape()]) uniform = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) samples_g, samples_h = array_ops.unpack(uniform, num=2) theta = (2 * np.pi) * samples_h r = math_ops.sqrt(self._df * (math_ops.pow(samples_g, -2 / self._df) - 1)) samples = r * math_ops.cos(theta) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) samples.set_shape(inferred_shape) return samples * self._sigma + self._mu
def testBatch(self): # Build an arbitrary RGB image np.random.seed(7) batch_size = 5 shape = (batch_size, 2, 7, 3) inp = np.random.rand(*shape).astype(np.float32) # Convert to HSV and back, as a batch and individually with self.test_session() as sess: batch0 = constant_op.constant(inp) batch1 = image_ops.rgb_to_hsv(batch0) batch2 = image_ops.hsv_to_rgb(batch1) split0 = array_ops.unpack(batch0) split1 = map(image_ops.rgb_to_hsv, split0) split2 = map(image_ops.hsv_to_rgb, split1) join1 = array_ops.pack(split1) join2 = array_ops.pack(split2) batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2]) # Verify that processing batch elements together is the same as separate self.assertAllClose(batch1, join1) self.assertAllClose(batch2, join2) self.assertAllClose(batch2, inp)
def call(self, inputs): shape = inputs.get_shape().as_list() input_dim = shape[-1] output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Reshape the input to 2D. output_shape_tensors = array_ops.unpack(array_ops.shape(inputs)) output_shape_tensors[-1] = self.units output_shape_tensor = array_ops.pack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) outputs = standard_ops.matmul(inputs, self.w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if len(output_shape) > 2: # Reshape the output back to the original ndim of the input. outputs = array_ops.reshape(outputs, output_shape_tensor) outputs.set_shape(output_shape) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None): """Constructs the Hessian of sum of `ys` with respect to `x` in `xs`. `hessians()` adds ops to the graph to output the Hessian matrix of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where each tensor is the Hessian of `sum(ys)`. This function currently only supports evaluating the Hessian with respect to (a list of) one- dimensional tensors. The Hessian is a matrix of second-order partial derivatives of a scalar tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details). Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. name: Optional name to use for grouping all the gradient ops together. defaults to 'hessians'. colocate_gradients_with_ops: See `gradients()` documentation for details. gate_gradients: See `gradients()` documentation for details. aggregation_method: See `gradients()` documentation for details. Returns: A list of Hessian matrices of `sum(y)` for each `x` in `xs`. Raises: LookupError: if one of the operations between `xs` and `ys` does not have a registered gradient function. ValueError: if the arguments are invalid or not supported. Currently, this function only supports one-dimensional `x` in `xs`. """ xs = _AsList(xs) kwargs = { 'colocate_gradients_with_ops': colocate_gradients_with_ops, 'gate_gradients': gate_gradients, 'aggregation_method': aggregation_method } # Compute a hessian matrix for each x in xs hessians = [] for i, x in enumerate(xs): # Check dimensions ndims = x.get_shape().ndims if ndims is None: raise ValueError('Cannot compute Hessian because the dimensionality of ' 'element number %d of `xs` cannot be determined' % i) elif ndims != 1: raise ValueError('Computing hessians is currently only supported for ' 'one-dimensional tensors. Element number %d of `xs` has ' '%d dimensions.' % (i, ndims)) with ops.name_scope(name + '_first_derivative'): # Compute the partial derivatives of the input with respect to all # elements of `x` _gradients = gradients(ys, x, **kwargs)[0] # Unpack the gradients into a list so we can take derivatives with # respect to each element _gradients = array_ops.unpack(_gradients) with ops.name_scope(name + '_second_derivative'): # Compute the partial derivatives with respect to each element of the list _hess = [gradients(_gradient, x, **kwargs)[0] for _gradient in _gradients] # Pack the list into a matrix and add to the list of hessians hessians.append(array_ops.stack(_hess, name=name)) return hessians
def _PackGrad(op, grad): """Gradient for pack op.""" return array_ops.unpack(grad, num=op.get_attr("N"))
def legacy_fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=init_ops.zeros_initializer, name=None, weight_collections=(ops.GraphKeys.WEIGHTS,), bias_collections=(ops.GraphKeys.BIASES,), output_collections=(ops.GraphKeys.ACTIVATIONS,), trainable=True, weight_regularizer=None, bias_regularizer=None): # pylint: disable=anomalous-backslash-in-string r"""Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)] with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix multiply along the first dimensions. The result r is a tensor of shape [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`], where \\\( r_{i_0, ..., i_{n-1}, k} = \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\). This is accomplished by reshaping `x` to 2-D [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)] before the matrix multiply and afterwards reshaping it to [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`]. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and in which collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): dims = x.get_shape().dims if dims is None: raise ValueError('dims of x must be known but is None') if len(dims) < 2: raise ValueError('rank of x must be at least 2 not: %d' % len(dims)) num_input_units = dims[-1].value if num_input_units is None: raise ValueError('last dimension of x must be known but is None') dtype = x.dtype.base_dtype weight_collections = set(list(weight_collections or []) + [ops.GraphKeys.VARIABLES]) w = variable_scope.get_variable('weights', shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer, trainable=trainable) x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x, [-1, num_input_units]) y = standard_ops.matmul(x_2_dim, w) if bias_init is not None: bias_collections = set(list(bias_collections or []) + [ops.GraphKeys.VARIABLES]) b = variable_scope.get_variable('bias', shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer, trainable=trainable) y = nn.bias_add(y, b) if len(dims) > 2: out_shape = array_ops.unpack(array_ops.shape(x)) out_shape[-1] = num_output_units y = array_ops.reshape(y, array_ops.pack(out_shape)) static_shape = x.get_shape().as_list() static_shape[-1] = num_output_units y.set_shape(static_shape) return _apply_activation(y, activation_fn, output_collections)
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer, the number of output units in the layer. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not isinstance(num_outputs, int): raise ValueError('num_outputs should be integer, got %s.', num_outputs) with variable_scope.variable_op_scope([inputs], scope, 'fully_connected', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2) static_shape = inputs.get_shape().as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def __init__(self): self.max_len = 300 self.seg_len = 5 self.batch_size = 100 self.number_of_layers = 1 self.dim = 59+2 self.num_epoch=10000 self.num_hidden_1=2 self.num_hidden_2=2 self.input=tf.placeholder(tf.float32,[self.max_len,None,self.dim]) self.target = tf.placeholder(tf.float32, [None, 2]) self.keep_prob = tf.placeholder(tf.float32) input=array_ops.unpack(self.input) batch_size = tf.shape(self.input)[1] def _rnn(cell, inputs): with tf.variable_scope("GRU_RNN") as scope: state = cell.zero_state(batch_size, tf.float32) for time, input_ in enumerate(inputs): if time > 0: scope.reuse_variables() output, state = cell(input_, state) return state def h_rnn(input): i=0 num_layer=0 layer=[input] while True: print(num_layer) layer.append([]) _input=layer[num_layer] length = len(_input) with tf.variable_scope("RNN_"+str(num_layer)) as scope: cell=rnn_cell.BasicLSTMCell(self.dim) cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) stacked_cell = rnn_cell.MultiRNNCell([cell] * self.number_of_layers) i=0 while i<length: state = _rnn(stacked_cell, _input[i:min(i+self.seg_len,length)]) layer[num_layer+1].append(state) scope.reuse_variables() i+=self.seg_len num_layer+=1 if length<=self.seg_len: break return layer[num_layer][0] state = h_rnn(input) with tf.variable_scope("NN", initializer=tf.random_uniform_initializer()): self.W_1 = tf.get_variable("W_1", [state.get_shape()[1],self.num_hidden_1]) self.b_1 = tf.get_variable("b_1", [self.num_hidden_1]) # self.W_2 = tf.get_variable("W_2", [self.num_hidden_1,self.num_hidden_2]) # self.b_2 = tf.get_variable("b_2", [self.num_hidden_2]) y_1 = tf.matmul(state, self.W_1)+self.b_1 # y_1 = tf.nn.sigmoid(tf.matmul(state, self.W_1)+self.b_1) # y_2 = tf.matmul(y_1, self.W_2)+self.b_2 self.y_pred = tf.nn.softmax(y_1) self.cross_entropy = -tf.reduce_mean(self.target*tf.log(self.y_pred)) correct_prediction = tf.equal(tf.argmax(self.target, 1), tf.argmax(self.y_pred, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy_summary = tf.scalar_summary("accuracy", self.accuracy) ce_summ = tf.scalar_summary("cross entropy", self.cross_entropy) self.merged = tf.merge_all_summaries() # Optimizer. global_step = tf.Variable(0) # optimizer = tf.train.GradientDescentOptimizer(0.1) optimizer = tf.train.AdamOptimizer(0.01) gradients, v = zip(*optimizer.compute_gradients(self.cross_entropy)) gradients, _ = tf.clip_by_global_norm(gradients, 10) self.optimizer= optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` or a list of `time_len` tensors of shape `[batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. scope: `VariableScope` for the created subgraph; defaults to class name. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ with vs.variable_scope(scope or type(self).__name__): is_list = isinstance(inputs, list) if is_list: inputs = array_ops.pack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros( array_ops.pack([batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None") if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask( sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile( array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat( 0, [array_ops.expand_dims(initial_cell_state, [0]), cell_states]) mod_outputs = array_ops.concat( 0, [array_ops.expand_dims(initial_output, [0]), outputs]) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unpack(outputs) return outputs, (final_cell_state, final_output)