def _entropy(self): if (not self.distribution.is_continuous or not self.bijector.is_constant_jacobian): raise NotImplementedError("entropy is not implemented") # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It # can be shown that: # H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)]. # If is_constant_jacobian then: # E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c) # where c can by anything. entropy = self.distribution.entropy() if self._is_maybe_event_override: # H[X] = sum_i H[X_i] if X_i are mutually independent. # This means that a reduce_sum is a simple rescaling. entropy *= math_ops.cast(math_ops.reduce_prod( self._override_event_shape), dtype=entropy.dtype.base_dtype) if self._is_maybe_batch_override: new_shape = array_ops.concat_v2([ _ones_like(self._override_batch_shape), self.distribution.batch_shape() ], 0) entropy = array_ops.reshape(entropy, new_shape) multiples = array_ops.concat_v2([ self._override_batch_shape, _ones_like(self.distribution.batch_shape()) ], 0) entropy = array_ops.tile(entropy, multiples) dummy = 0. return entropy - self.bijector.inverse_log_det_jacobian(dummy)
def _BiasAddGradGrad(op, received_grad): """Gradient for the BiasAddGrad op. Args: op: BiasAddGrad op for which we are calculating gradients. received_grad: The gradients passed to the BiasAddGrad op. Returns: A single gradient Tensor for the input to BiasAddGrad (which is the gradient of the bias term in BiasAdd) """ try: data_format = op.get_attr("data_format") except ValueError: data_format = None shape = array_ops.shape(op.inputs[0]) rank = array_ops.rank(op.inputs[0]) bias_shape = array_ops.shape(received_grad) if data_format == b"NCHW": expanded_shape = array_ops.concat_v2([ array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[ -2:]) ], 0) tile_mults = array_ops.concat_v2([shape[:-3], [1], shape[-2:]], 0) else: expanded_shape = array_ops.concat_v2( [array_ops.ones_like(shape[:-1]), bias_shape], 0) tile_mults = array_ops.concat_v2([shape[:-1], [1]], 0) expanded_grad = array_ops.reshape(received_grad, expanded_shape) return array_ops.tile(expanded_grad, tile_mults)
def same_dynamic_shape(a, b): """Returns whether a and b have the same dynamic shape. Args: a: `Tensor` b: `Tensor` Returns: `Boolean` `Tensor` representing if both tensors have the same shape. """ a = ops.convert_to_tensor(a, name="a") b = ops.convert_to_tensor(b, name="b") # One of the shapes isn't fully defined, so we need to use the dynamic # shape. return control_flow_ops.cond( math_ops.equal(array_ops.rank(a), array_ops.rank(b)), # Here we can't just do math_ops.equal(a.shape, b.shape), since # static shape inference may break the equality comparison between # shape(a) and shape(b) in math_ops.equal. lambda: math_ops.reduce_all( math_ops.equal( array_ops.concat_v2( (array_ops.shape(a), array_ops.shape(b)), 0), array_ops.concat_v2( (array_ops.shape(b), array_ops.shape(a)), 0))), lambda: constant_op.constant(False))
def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul): """Common code for SparseDenseCwise{Mul,Div} gradients.""" x_indices = op.inputs[0] x_shape = op.inputs[2] y = op.inputs[3] y_shape = math_ops.to_int64(array_ops.shape(y)) num_added_dims = array_ops.expand_dims( array_ops.size(x_shape) - array_ops.size(y_shape), 0) augmented_y_shape = array_ops.concat_v2( [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0) scaling = x_shape // augmented_y_shape scaled_indices = x_indices // scaling scaled_indices = array_ops.slice( scaled_indices, array_ops.concat_v2([[0], num_added_dims], 0), [-1, -1]) dense_vals = array_ops.gather_nd(y, scaled_indices) if is_mul: dx = grad * dense_vals dy_val = grad * op.inputs[1] else: dx = grad / dense_vals dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals)) # indices can repeat after scaling, so we can't use sparse_to_dense(). dy = sparse_ops.sparse_add( array_ops.zeros_like(y), sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape)) # (sp_indices, sp_vals, sp_shape, dense) return (None, dx, None, dy)
def _entropy(self): if (not self.distribution.is_continuous or not self.bijector.is_constant_jacobian): raise NotImplementedError("entropy is not implemented") # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It # can be shown that: # H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)]. # If is_constant_jacobian then: # E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c) # where c can by anything. entropy = self.distribution.entropy() if self._is_maybe_event_override: # H[X] = sum_i H[X_i] if X_i are mutually independent. # This means that a reduce_sum is a simple rescaling. entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape), dtype=entropy.dtype.base_dtype) if self._is_maybe_batch_override: new_shape = array_ops.concat_v2([ _ones_like(self._override_batch_shape), self.distribution.batch_shape()], 0) entropy = array_ops.reshape(entropy, new_shape) multiples = array_ops.concat_v2([ self._override_batch_shape, _ones_like(self.distribution.batch_shape())], 0) entropy = array_ops.tile(entropy, multiples) dummy = 0. return entropy - self.bijector.inverse_log_det_jacobian(dummy)
def same_dynamic_shape(a, b): """Returns whether a and b have the same dynamic shape. Args: a: `Tensor` b: `Tensor` Returns: `Boolean` `Tensor` representing if both tensors have the same shape. """ a = ops.convert_to_tensor(a, name="a") b = ops.convert_to_tensor(b, name="b") # One of the shapes isn't fully defined, so we need to use the dynamic # shape. return control_flow_ops.cond( math_ops.equal(array_ops.rank(a), array_ops.rank(b)), # Here we can't just do math_ops.equal(a.shape, b.shape), since # static shape inference may break the equality comparison between # shape(a) and shape(b) in math_ops.equal. lambda: math_ops.reduce_all(math_ops.equal( array_ops.concat_v2(( array_ops.shape(a), array_ops.shape(b)), 0), array_ops.concat_v2(( array_ops.shape(b), array_ops.shape(a)), 0))), lambda: constant_op.constant(False))
def _define_partial_maximization_operation(self, shard_id, shard): """Computes the partial statistics of the means and covariances. Args: shard_id: current shard id. shard: current data shard, 1 X num_examples X dimensions. """ # Soft assignment of each data point to each of the two clusters. self._points_in_k[shard_id] = math_ops.reduce_sum(self._w[shard_id], 0, keep_dims=True) # Partial means. w_mul_x = array_ops.expand_dims( math_ops.matmul(self._w[shard_id], array_ops.squeeze(shard, [0]), transpose_a=True), 1) self._w_mul_x.append(w_mul_x) # Partial covariances. x = array_ops.concat_v2([shard for _ in range(self._num_classes)], 0) x_trans = array_ops.transpose(x, perm=[0, 2, 1]) x_mul_w = array_ops.concat_v2([ array_ops.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0) for k in range(self._num_classes) ], 0) self._w_mul_x2.append(math_ops.matmul(x_mul_w, x))
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. ```python # Construct one identity matrix. tf.eye(2) ==> [[1., 0.], [0., 1.]] # Construct a batch of 3 identity matricies, each 2 x 2. # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. batch_identity = tf.eye(2, batch_shape=[3]) # Construct one 2 x 3 "identity" matrix tf.eye(2, num_columns=3) ==> [[ 1., 0., 0.], [ 0., 1., 0.]] ``` Args: num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows in each batch matrix. num_columns: Optional non-negative `int32` scalar `Tensor` giving the number of columns in each batch matrix. Defaults to `num_rows`. batch_shape: `int32` `Tensor`. If provided, returned `Tensor` will have leading batch dimensions of this shape. dtype: The type of an element in the resulting `Tensor` name: A name for this `Op`. Defaults to "eye". Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ with ops.name_scope(name, default_name='eye', values=[num_rows, num_columns, batch_shape]): batch_shape = [] if batch_shape is None else batch_shape batch_shape = ops.convert_to_tensor(batch_shape, name='shape', dtype=dtypes.int32) if num_columns is None: diag_size = num_rows else: diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0) diag_ones = array_ops.ones(diag_shape, dtype=dtype) if num_columns is None: return array_ops.matrix_diag(diag_ones) else: shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0) zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def _GRUBlockCellGrad(op, *grad): r"""Gradient for GRUBlockCell. Args: op: Op for which the gradient is defined. *grad: Gradients of the optimization function wrt output for the Op. Returns: d_x: Gradients wrt to x d_h: Gradients wrt to h d_w_ru: Gradients wrt to w_ru d_w_c: Gradients wrt to w_c d_b_ru: Gradients wrt to b_ru d_b_c: Gradients wrt to b_c Mathematics behind the Gradients below: ``` d_c_bar = d_h \circ (1-u) \circ (1-c \circ c) d_u_bar = d_h \circ (h-c) \circ u \circ (1-u) d_r_bar_u_bar = [d_r_bar d_u_bar] [d_x_component_1 d_h_prev_component_1] = d_r_bar_u_bar * w_ru^T [d_x_component_2 d_h_prevr] = d_c_bar * w_c^T d_x = d_x_component_1 + d_x_component_2 d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + u ``` Below calculation is performed in the python wrapper for the Gradients (not in the gradient kernel.) ``` d_w_ru = x_h_prevr^T * d_c_bar d_w_c = x_h_prev^T * d_r_bar_u_bar d_b_ru = sum of d_r_bar_u_bar along axis = 0 d_b_c = sum of d_c_bar along axis = 0 ``` """ x, h_prev, w_ru, w_c, b_ru, b_c = op.inputs r, u, c, _ = op.outputs _, _, _, d_h = grad d_x, d_h_prev, d_c_bar, d_r_bar_u_bar = _gru_ops_so.gru_block_cell_grad( x, h_prev, w_ru, w_c, b_ru, b_c, r, u, c, d_h) x_h_prev = array_ops.concat_v2([x, h_prev], 1) d_w_ru = math_ops.matmul(x_h_prev, d_r_bar_u_bar, transpose_a=True) d_b_ru = nn_ops.bias_add_grad(d_r_bar_u_bar) x_h_prevr = array_ops.concat_v2([x, h_prev * r], 1) d_w_c = math_ops.matmul(x_h_prevr, d_c_bar, transpose_a=True) d_b_c = nn_ops.bias_add_grad(d_c_bar) return d_x, d_h_prev, d_w_ru, d_w_c, d_b_ru, d_b_c
def testAttentionCellWrapperCorrectResult(self): num_units = 4 attn_length = 6 batch_size = 2 expected_output = np.array( [[0.955392, 0.408507, -0.60122, 0.270718], [0.903681, 0.331165, -0.500238, 0.224052]], dtype=np.float32) expected_state = np.array( [[0.81331915, 0.32036272, 0.28079176, 1.08888793, 0.41264394, 0.1062041, 0.10444493, 0.32050529, 0.64655536, 0.70794445, 0.51896095, 0.31809306, 0.58086717, 0.49446869, 0.7641536, 0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063, 0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228, 0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432, 0.99211812, 0.12295902, 1.01412082, 0.33123279, -0.71114945, 0.40583119], [0.59962207, 0.42597458, -0.22491696, 0.98063421, 0.32548007, 0.11623692, -0.10100613, 0.27708149, 0.76956916, 0.6360054, 0.51719815, 0.50458527, 0.73000264, 0.66986895, 0.73576689, 0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957, 0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421, 0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971, 0.36127412, 0.12125921, 0.99780077, 0.31886846, -0.67595094, 0.56531656]], dtype=np.float32) seed = 12345 random_seed.set_random_seed(seed) for state_is_tuple in [False, True]: with session.Session() as sess: with variable_scope.variable_scope( "state_is_tuple", reuse=state_is_tuple): lstm_cell = core_rnn_cell_impl.BasicLSTMCell( num_units, state_is_tuple=state_is_tuple) cell = rnn_cell.AttentionCellWrapper( lstm_cell, attn_length, state_is_tuple=state_is_tuple) zeros1 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 1) zeros2 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 2) zeros3 = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 3) attn_state_zeros = random_ops.random_uniform( (batch_size, attn_length * num_units), 0.0, 1.0, seed=seed + 4) zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros) if not state_is_tuple: zero_state = array_ops.concat_v2([ zero_state[0][0], zero_state[0][1], zero_state[1], zero_state[2] ], 1) inputs = random_ops.random_uniform( (batch_size, num_units), 0.0, 1.0, seed=seed + 5) output, state = cell(inputs, zero_state) if state_is_tuple: state = array_ops.concat_v2( [state[0][0], state[0][1], state[1], state[2]], 1) sess.run(variables.global_variables_initializer()) self.assertAllClose(sess.run(output), expected_output) self.assertAllClose(sess.run(state), expected_state)
def eye( num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. ```python # Construct one identity matrix. tf.eye(2) ==> [[1., 0.], [0., 1.]] # Construct a batch of 3 identity matricies, each 2 x 2. # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. batch_identity = tf.eye(2, batch_shape=[3]) # Construct one 2 x 3 "identity" matrix tf.eye(2, num_columns=3) ==> [[ 1., 0., 0.], [ 0., 1., 0.]] ``` Args: num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows in each batch matrix. num_columns: Optional non-negative `int32` scalar `Tensor` giving the number of columns in each batch matrix. Defaults to `num_rows`. batch_shape: `int32` `Tensor`. If provided, returned `Tensor` will have leading batch dimensions of this shape. dtype: The type of an element in the resulting `Tensor` name: A name for this `Op`. Defaults to "eye". Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ with ops.name_scope( name, default_name="eye", values=[num_rows, num_columns, batch_shape]): batch_shape = [] if batch_shape is None else batch_shape batch_shape = ops.convert_to_tensor( batch_shape, name="shape", dtype=dtypes.int32) if num_columns is None: diag_size = num_rows else: diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0) diag_ones = array_ops.ones(diag_shape, dtype=dtype) if num_columns is None: return array_ops.matrix_diag(diag_ones) else: shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0) zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def boston_eval_fn(): boston = base.load_boston() n_examples = len(boston.target) features = array_ops.reshape( constant_op.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM]) labels = array_ops.reshape( constant_op.constant(boston.target), [n_examples, 1]) return array_ops.concat_v2([features, features], 0), array_ops.concat_v2( [labels, labels], 0)
def _sample_n(self, n, seed): batch_shape = self.batch_shape() event_shape = self.event_shape() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat_v2(((n,), batch_shape, event_shape), 0) # Complexity: O(nbk^2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) g = random_ops.random_gamma(shape=(n,), alpha=self._multi_gamma_sequence( 0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk^2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk^2) perm = array_ops.concat_v2((math_ops.range(1, ndims), (0,)), 0) x = array_ops.transpose(x, perm) shape = array_ops.concat_v2((batch_shape, (event_shape[0], -1)), 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat_v2((batch_shape, event_shape, (n,)), 0) x = array_ops.reshape(x, shape) perm = array_ops.concat_v2(((ndims - 1,), math_ops.range(0, ndims - 1)), 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def _propagate(dim_indices, conf, cell, c_prev, m_prev, new_output, new_state, first_call): """Propagates through all the cells in dim_indices dimensions. """ if len(dim_indices) == 0: return # Because of the way RNNCells are implemented, we take the last dimension # (H_{N-1}) out and feed it as the state of the RNN cell # (in `last_dim_output`). # The input of the cell (H_0 to H_{N-2}) are concatenated into `cell_inputs` if conf.num_dims > 1: ls_cell_inputs = [None] * (conf.num_dims - 1) for d in conf.dims[:-1]: ls_cell_inputs[d.idx] = new_output[d.idx] if new_output[ d.idx] is not None else m_prev[d.idx] cell_inputs = array_ops.concat_v2(ls_cell_inputs, 1) else: cell_inputs = array_ops.zeros([m_prev[0].get_shape().as_list()[0], 0], m_prev[0].dtype) last_dim_output = new_output[-1] if new_output[-1] is not None else m_prev[ -1] for i in dim_indices: d = conf.dims[i] if d.non_recurrent_fn: linear_args = array_ops.concat_v2( [cell_inputs, last_dim_output], 1) if conf.num_dims > 1 else last_dim_output with vs.variable_scope('non_recurrent' if conf.tied else 'non_recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() new_output[d.idx] = layers.legacy_fully_connected( linear_args, num_output_units=conf.num_units, activation_fn=d.non_recurrent_fn, weight_init=vs.get_variable_scope().initializer or layers.initializers.xavier_initializer) else: if c_prev[i] is not None: cell_state = array_ops.concat_v2([c_prev[i], last_dim_output], 1) else: # for GRU/RNN, the state is just the previous output cell_state = last_dim_output with vs.variable_scope('recurrent' if conf. tied else 'recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() new_output[d.idx], new_state[d.idx] = cell( cell_inputs, cell_state)
def testOpsBetweenCut(self): with ops.Graph().as_default() as g: t1 = constant(1.0) t2 = constant(2.0) t3 = array_ops.stack([t1, t2]) t4 = constant([1.0]) t5 = array_ops.concat_v2([t4, t3], 0) t6 = constant([2.0]) t7 = array_ops.concat_v2([t5, t6], 0) self._assertOpListEqual([t7.op, t5.op, t4.op], _OpsBetween(g, [t7.op], [t4.op]))
def testConcat(self): tf_val = array_ops.concat_v2( [[16, 37], array_ops.placeholder( dtypes.int32, shape=(2,))], 0) c_val = tensor_util.constant_value_as_shape(tf_val) self.assertEqual([16, 37, None, None], c_val.as_list()) tf_val = array_ops.concat_v2( [[16, 37], array_ops.placeholder( dtypes.int32, shape=(1,)), [48]], 0) c_val = tensor_util.constant_value_as_shape(tf_val) self.assertEqual([16, 37, None, 48], c_val.as_list())
def _sample_n(self, n, seed): batch_shape = self.batch_shape() event_shape = self.event_shape() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat_v2(((n,), batch_shape, event_shape), 0) # Complexity: O(nbk^2) x = random_ops.random_normal(shape=shape, mean=0.0, stddev=1.0, dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) g = random_ops.random_gamma( shape=(n,), alpha=self._multi_gamma_sequence(0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed(seed, "wishart"), ) # Complexity: O(nbk^2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk^2) perm = array_ops.concat_v2((math_ops.range(1, ndims), (0,)), 0) x = array_ops.transpose(x, perm) shape = array_ops.concat_v2((batch_shape, (event_shape[0], -1)), 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat_v2((batch_shape, event_shape, (n,)), 0) x = array_ops.reshape(x, shape) perm = array_ops.concat_v2(((ndims - 1,), math_ops.range(0, ndims - 1)), 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat_v2([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat_v2([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def testConcat(self): tf_val = array_ops.concat_v2( [[16, 37], array_ops.placeholder(dtypes.int32, shape=(2, ))], 0) c_val = tensor_util.constant_value_as_shape(tf_val) self.assertEqual([16, 37, None, None], c_val.as_list()) tf_val = array_ops.concat_v2( [[16, 37], array_ops.placeholder(dtypes.int32, shape=(1, )), [48]], 0) c_val = tensor_util.constant_value_as_shape(tf_val) self.assertEqual([16, 37, None, 48], c_val.as_list())
def _flip_matrix_to_vector_dynamic(mat, batch_shape): """Flip matrix to vector with dynamic shapes.""" mat_rank = array_ops.rank(mat) k = array_ops.gather(array_ops.shape(mat), mat_rank - 2) final_shape = array_ops.concat_v2((batch_shape, [k]), 0) # mat.shape = matrix_batch_shape + [k, M] # Permutation corresponding to [M] + matrix_batch_shape + [k] perm = array_ops.concat_v2( ([mat_rank - 1], math_ops.range(0, mat_rank - 1)), 0) mat_with_end_at_beginning = array_ops.transpose(mat, perm=perm) vector = array_ops.reshape(mat_with_end_at_beginning, final_shape) return vector
def _propagate(dim_indices, conf, cell, c_prev, m_prev, new_output, new_state, first_call): """Propagates through all the cells in dim_indices dimensions. """ if len(dim_indices) == 0: return # Because of the way RNNCells are implemented, we take the last dimension # (H_{N-1}) out and feed it as the state of the RNN cell # (in `last_dim_output`). # The input of the cell (H_0 to H_{N-2}) are concatenated into `cell_inputs` if conf.num_dims > 1: ls_cell_inputs = [None] * (conf.num_dims - 1) for d in conf.dims[:-1]: ls_cell_inputs[d.idx] = new_output[d.idx] if new_output[ d.idx] is not None else m_prev[d.idx] cell_inputs = array_ops.concat_v2(ls_cell_inputs, 1) else: cell_inputs = array_ops.zeros([m_prev[0].get_shape().as_list()[0], 0], m_prev[0].dtype) last_dim_output = new_output[-1] if new_output[-1] is not None else m_prev[-1] for i in dim_indices: d = conf.dims[i] if d.non_recurrent_fn: linear_args = array_ops.concat_v2( [cell_inputs, last_dim_output], 1) if conf.num_dims > 1 else last_dim_output with vs.variable_scope('non_recurrent' if conf.tied else 'non_recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() new_output[d.idx] = layers.legacy_fully_connected( linear_args, num_output_units=conf.num_units, activation_fn=d.non_recurrent_fn, weight_init=vs.get_variable_scope().initializer or layers.initializers.xavier_initializer) else: if c_prev[i] is not None: cell_state = array_ops.concat_v2([c_prev[i], last_dim_output], 1) else: # for GRU/RNN, the state is just the previous output cell_state = last_dim_output with vs.variable_scope('recurrent' if conf.tied else 'recurrent/cell_{}'.format(i)): if conf.tied and not (first_call and i == dim_indices[0]): vs.get_variable_scope().reuse_variables() new_output[d.idx], new_state[d.idx] = cell(cell_inputs, cell_state)
def _testGradientsForAxis(self, inp_tensors, axis, output_shape, feed_dict=None): with self.test_session(): c = array_ops.concat_v2(inp_tensors, axis) grad_inp = np.random.rand(*output_shape).astype("f") grad_tensor = constant_op.constant( [float(x) for x in grad_inp.flatten()], shape=output_shape) grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor]) concated_grad = array_ops.concat_v2(grad, axis) result = concated_grad.eval(feed_dict=feed_dict) self.assertAllEqual(result, grad_inp)
def testOpsBetweenCycle(self): with ops.Graph().as_default() as g: t1 = constant(1.0) t2 = constant(2.0) t3 = array_ops.pack([t1, t2]) t4 = array_ops.concat_v2([t3, t3, t3], 0) t5 = constant([1.0]) t6 = array_ops.concat_v2([t4, t5], 0) t7 = array_ops.concat_v2([t6, t3], 0) self._assertOpListEqual([t6.op, t4.op, t3.op], _OpsBetween(g, [t6.op], [t3.op])) self._assertOpListEqual([t7.op, t6.op, t5.op, t4.op, t3.op, t1.op], _OpsBetween(g, [t7.op], [t1.op, t5.op])) self._assertOpListEqual([t6.op, t5.op, t4.op, t3.op, t2.op], _OpsBetween(g, [t6.op], [t2.op, t5.op]))
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32( math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64( array_ops.concat_v2([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat_v2([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def test_broadcast_apply_and_solve(self): # These cannot be done in the automated (base test class) tests since they # test shapes that tf.matmul cannot handle. # In particular, tf.matmul does not broadcast. with self.test_session() as sess: x = random_ops.random_normal(shape=(2, 2, 3, 4)) # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve # and apply with 'x' as the argument. diag = random_ops.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True) self.assertAllEqual((2, 1, 3, 3), operator.shape) # Create a batch matrix with the broadcast shape of operator. diag_broadcast = array_ops.concat_v2((diag, diag), 1) mat = array_ops.matrix_diag(diag_broadcast) self.assertAllEqual((2, 2, 3, 3), mat.get_shape()) # being pedantic. operator_apply = operator.apply(x) mat_apply = math_ops.matmul(mat, x) self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape()) self.assertAllClose(*sess.run([operator_apply, mat_apply])) operator_solve = operator.solve(x) mat_solve = linalg_ops.matrix_solve(mat, x) self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape()) self.assertAllClose(*sess.run([operator_solve, mat_solve]))
def construct_fn(attention_query, attention_keys, attention_values): context = attention_score_fn(attention_query, attention_keys, attention_values) concat_input = array_ops.concat_v2([attention_query, context], 1) attention = layers.linear( concat_input, num_units, biases_initializer=None, scope=scope) return attention
def _concat(self): """Returns the overall concatenated value as a `Tensor`. This is different from using the partitioned variable directly as a tensor (through tensor conversion and `as_tensor`) in that it creates a new set of operations that keeps the control dependencies from its scope. Returns: `Tensor` containing the concatenated value. """ if len(self._variable_list) == 1: with ops.name_scope(None): return array_ops.identity(self._variable_list[0], name=self._name) partition_axes = self._partition_axes() if len(partition_axes) > 1: raise NotImplementedError( "Cannot concatenate along more than one dimension: %s. " "Multi-axis partition concat is not supported" % str(partition_axes)) partition_ix = partition_axes[0] with ops.name_scope(self._name + "/ConcatPartitions/"): concatenated = array_ops.concat_v2(self._variable_list, partition_ix) with ops.name_scope(None): return array_ops.identity(concatenated, name=self._name)
def _transpose_batch_time(x): """Transpose the batch and time dimensions of a Tensor. Retains as much of the static shape information as possible. Args: x: A tensor of rank 2 or higher. Returns: x transposed along the first two dimensions. Raises: ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2, but saw shape: %s" % (x, x_static_shape)) x_rank = array_ops.rank(x) x_t = array_ops.transpose( x, array_ops.concat_v2( ([1, 0], math_ops.range(2, x_rank)), axis=0)) x_t.set_shape( tensor_shape.TensorShape([ x_static_shape[1].value, x_static_shape[0].value ]).concatenate(x_static_shape[2:])) return x_t
def sample(self, sample_shape=(), seed=None, name="sample", **condition_kwargs): """Generate samples of the specified shape. Note that a call to `sample()` without arguments will generate a single sample. Args: sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples. seed: Python integer seed for RNG name: name to give to the op. **condition_kwargs: Named arguments forwarded to subclass implementation. Returns: samples: a `Tensor` with prepended dimensions `sample_shape`. """ with self._name_scope(name, values=[sample_shape]): sample_shape = ops.convert_to_tensor( sample_shape, dtype=dtypes.int32, name="sample_shape") sample_shape, n = self._expand_sample_shape_to_vector( sample_shape, "sample_shape") samples = self._sample_n(n, seed, **condition_kwargs) batch_event_shape = array_ops.shape(samples)[1:] final_shape = array_ops.concat_v2([sample_shape, batch_event_shape], 0) samples = array_ops.reshape(samples, final_shape) samples = self._set_sample_static_shape(samples, sample_shape) return samples
def testPartialShapes(self): x = array_ops.placeholder(dtypes.float32) # Unknown input shape, partial new shape. y = array_ops.reshape(x, [1, 1, -1, 1]) self.assertEqual([1, 1, None, 1], y.get_shape().as_list()) # Unknown input shape, unknown new shape. y = array_ops.reshape(x, array_ops.placeholder(dtypes.int32)) self.assertEqual(None, y.get_shape().ndims) # Unknown input shape, known rank for new shape. y = array_ops.reshape(x, array_ops.placeholder(dtypes.int32, shape=(3,))) self.assertEqual([None, None, None], y.get_shape().as_list()) # Unknown input shape, partial new shape using `tf.stack()`. y = array_ops.reshape(x, [array_ops.placeholder(dtypes.int32), 37]) self.assertEqual([None, 37], y.get_shape().as_list()) # Unknown input shape, partial new shape using `tf.concat_v2()`. y = array_ops.reshape(x, array_ops.concat_v2([array_ops.placeholder(dtypes.int32, shape=(2,)), [37, 42]], 0)) self.assertEqual([None, None, 37, 42], y.get_shape().as_list()) # Unknown input shape, partial new shape using `tf.shape()`. y = array_ops.reshape(x, array_ops.shape(array_ops.placeholder(dtypes.float32, shape=[None, 37, None]))) self.assertEqual([None, 37, None], y.get_shape().as_list())
def _sample_n(self, n, seed=None): shape = array_ops.concat_v2(([n], self.batch_shape()), 0) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims(self.range(), 0) * samples)
def testDynamicAttentionDecoderStateIsTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True) cell = core_rnn_cell_impl.MultiRNNCell( cells=[cell] * 2, state_is_tuple=True) inp = constant_op.constant(0.5, shape=[2, 2, 2]) enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat_v2([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell, output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual(2, len(res[0])) self.assertEqual((2, 2), res[0][0].c.shape) self.assertEqual((2, 2), res[0][0].h.shape) self.assertEqual((2, 2), res[0][1].c.shape) self.assertEqual((2, 2), res[0][1].h.shape)
def logits_to_predictions(self, logits, proba=False): if proba: raise ValueError( "logits to probabilities is not supported for _BinarySvmTargetColumn") logits = array_ops.concat_v2([array_ops.zeros_like(logits), logits], 1) return math_ops.argmax(logits, 1)
def testTimeReversedFusedRNN(self): with self.test_session() as sess: initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890213) cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn( cell, cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run( gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(fused_cell) fw_outputs, fw_state = fused_cell(inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell(inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat_v2([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run( gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def _SplitVGrad(op, *grads): returnval = array_ops.concat_v2(list(grads), op.inputs[2]) returnval = [returnval] + [ None, ] * (len(op.inputs) - 1) print(returnval) return returnval
def sample(self, sample_shape=(), seed=None, name="sample", **condition_kwargs): """Generate samples of the specified shape. Note that a call to `sample()` without arguments will generate a single sample. Args: sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples. seed: Python integer seed for RNG name: name to give to the op. **condition_kwargs: Named arguments forwarded to subclass implementation. Returns: samples: a `Tensor` with prepended dimensions `sample_shape`. """ with self._name_scope(name, values=[sample_shape]): sample_shape = ops.convert_to_tensor(sample_shape, dtype=dtypes.int32, name="sample_shape") sample_shape, n = self._expand_sample_shape_to_vector( sample_shape, "sample_shape") samples = self._sample_n(n, seed, **condition_kwargs) batch_event_shape = array_ops.shape(samples)[1:] final_shape = array_ops.concat_v2( [sample_shape, batch_event_shape], 0) samples = array_ops.reshape(samples, final_shape) samples = self._set_sample_static_shape(samples, sample_shape) return samples
def __call__(self, inputs, state, scope=None): """LSTM as mentioned in paper.""" with vs.variable_scope(scope or "basic_lstm_cell"): # Parameters of gates are concatenated into one multiply for # efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split( value=state, num_or_size_splits=2, axis=1) g = tf.concat([inputs, h],1) concat = linear([g], 4 * self._num_units) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split( value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat_v2([new_c, new_h], 1) return new_h, new_state
def _possibly_broadcast_batch_shape(self, x): """Return 'x', possibly after broadcasting the leading dimensions.""" # If we have no batch shape, our batch shape broadcasts with everything! if self._batch_shape_arg is None: return x # Static attempt: # If we determine that no broadcast is necessary, pass x through # If we need a broadcast, add to an array of zeros. # # special_shape is the shape that, when broadcast with x's shape, will give # the correct broadcast_shape. Note that # We have already verified the second to last dimension of self.shape # matches x's shape in assert_compatible_matrix_dimensions. # Also, the final dimension of 'x' can have any shape. # Therefore, the final two dimensions of special_shape are 1's. special_shape = self.batch_shape.concatenate([1, 1]) bshape = array_ops.broadcast_static_shape(x.get_shape(), special_shape) if special_shape.is_fully_defined(): # bshape.is_fully_defined iff special_shape.is_fully_defined. if bshape == x.get_shape(): return x # Use the built in broadcasting of addition. zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype) return x + zeros # Dynamic broadcast: # Always add to an array of zeros, rather than using a "cond", since a # cond would require copying data from GPU --> CPU. special_shape = array_ops.concat_v2( (self.batch_shape_dynamic(), [1, 1]), 0) zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype) return x + zeros
def tensors_to_item(self, keys_to_tensors): indices = keys_to_tensors[self._indices_key] values = keys_to_tensors[self._values_key] if self._shape_key: shape = keys_to_tensors[self._shape_key] if isinstance(shape, sparse_tensor.SparseTensor): shape = sparse_ops.sparse_tensor_to_dense(shape) elif self._shape: shape = self._shape else: shape = indices.dense_shape indices_shape = array_ops.shape(indices.indices) rank = indices_shape[1] ids = math_ops.to_int64(indices.values) indices_columns_to_preserve = array_ops.slice( indices.indices, [0, 0], array_ops.stack([-1, rank - 1])) new_indices = array_ops.concat_v2( [indices_columns_to_preserve, array_ops.reshape(ids, [-1, 1])], 1) tensor = sparse_tensor.SparseTensor(new_indices, values.values, shape) if self._densify: tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value) return tensor
def make_batch_of_event_sample_matrices( self, x, expand_batch_dim=True, name="make_batch_of_event_sample_matrices"): """Reshapes/transposes `Distribution` `Tensor` from S+B+E to B_+E_+S_. Where: - `B_ = B if B or not expand_batch_dim else [1]`, - `E_ = E if E else [1]`, - `S_ = [tf.reduce_prod(S)]`. Args: x: `Tensor`. expand_batch_dim: Python `Boolean` scalar. If `True` the batch dims will be expanded such that batch_ndims>=1. name: `String`. The name to give this op. Returns: x: `Tensor`. Input transposed/reshaped to `B_+E_+S_`. sample_shape: `Tensor` (1D, `int32`). """ with self._name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") sample_shape, batch_shape, event_shape = self.get_shape(x) event_shape = distribution_util.pick_vector( self._event_ndims_is_0, [1], event_shape) if expand_batch_dim: batch_shape = distribution_util.pick_vector( self._batch_ndims_is_0, [1], batch_shape) new_shape = array_ops.concat_v2([[-1], batch_shape, event_shape], 0) x = array_ops.reshape(x, shape=new_shape) x = distribution_util.rotate_transpose(x, shift=-1) return x, sample_shape
def _shape_dynamic(self): matrix_shape = array_ops.stack( (self._num_rows, self._num_rows), axis=0) if self._batch_shape_arg is None: return matrix_shape return array_ops.concat_v2((self._batch_shape_arg, matrix_shape), 0)
def embedding_lookup(params, ids, name='embedding_lookup'): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with ops.name_scope(name, 'embedding_lookup', [params, ids]): params = ops.convert_to_tensor(params) ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat_v2([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def embedding_lookup_unique(params, ids, name=None): """Version of embedding_lookup that avoids duplicate lookups. This can save communication in the case of repeated ids. Same interface as embedding_lookup. Except it supports multi-dimensional `ids` which allows to not reshape input/output to fit gather. Args: params: A list of tensors with the same shape and type, or a `PartitionedVariable`. Shape `[index, d1, d2, ...]`. ids: A one-dimensional `Tensor` with type `int32` or `int64` containing the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`. name: A name for this operation (optional). Returns: A `Tensor` with the same type as the tensors in `params` and dimension of `[ids1, ids2, d1, d2, ...]`. Raises: ValueError: If `params` is empty. """ with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) embed_shape = array_ops.concat_v2( [shape, array_ops.shape(unique_embeddings)[1:]], 0) embeds = array_ops.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate( unique_embeddings.get_shape()[1:])) return embeds
def _flip_vector_to_matrix_dynamic(vec, batch_shape): """flip_vector_to_matrix with dynamic shapes.""" # Shapes associated with batch_shape batch_rank = array_ops.size(batch_shape) # Shapes associated with vec. vec = ops.convert_to_tensor(vec, name="vec") vec_shape = array_ops.shape(vec) vec_rank = array_ops.rank(vec) vec_batch_rank = vec_rank - 1 m = vec_batch_rank - batch_rank # vec_shape_left = [M1,...,Mm] or []. vec_shape_left = array_ops.strided_slice(vec_shape, [0], [m]) # If vec_shape_left = [], then condensed_shape = [1] since reduce_prod([]) = 1 # If vec_shape_left = [M1,...,Mm], condensed_shape = [M1*...*Mm] condensed_shape = [math_ops.reduce_prod(vec_shape_left)] k = array_ops.gather(vec_shape, vec_rank - 1) new_shape = array_ops.concat_v2((batch_shape, [k], condensed_shape), 0) def _flip_front_dims_to_back(): # Permutation corresponding to [N1,...,Nn] + [k, M1,...,Mm] perm = array_ops.concat_v2( (math_ops.range(m, vec_rank), math_ops.range(0, m)), 0) return array_ops.transpose(vec, perm=perm) x_flipped = control_flow_ops.cond(math_ops.less(0, m), _flip_front_dims_to_back, lambda: array_ops.expand_dims(vec, -1)) return array_ops.reshape(x_flipped, new_shape)
def _sample_n(self, n, seed=None): new_shape = array_ops.concat_v2(([n], self.batch_shape()), 0) uniform = random_ops.random_uniform(new_shape, seed=seed, dtype=self.p.dtype) sample = math_ops.less(uniform, self.p) return math_ops.cast(sample, self.dtype)
def ParseLabelTensorOrDict(labels): """Return a tensor to use for input labels to tensor_forest. The incoming targets can be a dict where keys are the string names of the columns, which we turn into a single 1-D tensor for classification or 2-D tensor for regression. Converts sparse tensors to dense ones. Args: labels: `Tensor` or `dict` of `Tensor` objects. Returns: A 2-D tensor for labels/outputs. """ if isinstance(labels, dict): return math_ops.to_float( array_ops.concat_v2( [ sparse_ops.sparse_tensor_to_dense( labels[k], default_value=-1) if isinstance( labels, sparse_tensor.SparseTensor) else labels[k] for k in sorted(labels.keys()) ], 1)) else: if isinstance(labels, sparse_tensor.SparseTensor): return math_ops.to_float(sparse_ops.sparse_tensor_to_dense( labels, default_value=-1)) else: return math_ops.to_float(labels)
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell = core_rnn_cell_impl.GRUCell(2) enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat_v2([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] dec, mem = seq2seq_lib.embedding_attention_decoder( dec_inp, enc_state, attn_states, cell, num_symbols=4, embedding_size=2, output_size=3) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 3), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)