def _SubGrad(op, grad): x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy))
def crf_unary_score(tag_indices, sequence_lengths, inputs): """Computes the unary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. Returns: unary_scores: A [batch_size] vector of unary scores. """ batch_size = array_ops.shape(inputs)[0] max_seq_len = array_ops.shape(inputs)[1] num_tags = array_ops.shape(inputs)[2] flattened_inputs = array_ops.reshape(inputs, [-1]) offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( array_ops.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len]) masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1]) unary_scores = math_ops.reduce_sum(unary_scores * masks, 1) return unary_scores
def embedding_lookup_unique(params, ids, name=None): """Version of embedding_lookup that avoids duplicate lookups. This can save communication in the case of repeated ids. Same interface as embedding_lookup. Except it supports multi-dimensional `ids` which allows to not reshape input/output to fit gather. Args: params: A list of tensors with the same shape and type, or a `PartitionedVariable`. Shape `[index, d1, d2, ...]`. ids: A one-dimensional `Tensor` with type `int32` or `int64` containing the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`. name: A name for this operation (optional). Returns: A `Tensor` with the same type as the tensors in `params` and dimension of `[ids1, ids2, d1, d2, ...]`. Raises: ValueError: If `params` is empty. """ with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) embed_shape = array_ops.concat( [shape, array_ops.shape(unique_embeddings)[1:]], 0) embeds = array_ops.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate( unique_embeddings.get_shape()[1:])) return embeds
def _sample_n(self, n, seed): batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat([[n], batch_shape, event_shape], 0) # Complexity: O(nbk**2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = self.df * array_ops.ones( self.scale_operator.batch_shape_tensor(), dtype=self.df.dtype.base_dtype) g = random_ops.random_gamma(shape=[n], alpha=self._multi_gamma_sequence( 0.5 * expanded_df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk**2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0) x = array_ops.transpose(x, perm) shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, # each matmul is O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = array_ops.concat([batch_shape, event_shape, [n]], 0) x = array_ops.reshape(x, shape) perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers): """Creates an op for training for full batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. Returns: An op for doing an update of mini-batch k-means. """ cluster_sums = [] cluster_counts = [] epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype) for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): cluster_sums.append( math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters)) cluster_counts.append( math_ops.unsorted_segment_sum( array_ops.reshape( array_ops.ones( array_ops.reshape(array_ops.shape(inp)[0], [-1])), [-1, 1]), cluster_idx, self._num_clusters)) with ops.colocate_with(cluster_centers): new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast( math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon) if self._clusters_l2_normalized(): new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1) return state_ops.assign(cluster_centers, new_clusters_centers)
def mask_activations_and_labels(activations, labels, sequence_lengths): """Remove entries outside `sequence_lengths` and returned flattened results. Args: activations: Output of the RNN, shape `[batch_size, padded_length, k]`. labels: Label values, shape `[batch_size, padded_length]`. sequence_lengths: A `Tensor` of shape `[batch_size]` with the unpadded length of each sequence. If `None`, then each sequence is unpadded. Returns: activations_masked: `logit` values with those beyond `sequence_lengths` removed for each batch. Batches are then concatenated. Shape `[tf.sum(sequence_lengths), k]` if `sequence_lengths` is not `None` and shape `[batch_size * padded_length, k]` otherwise. labels_masked: Label values after removing unneeded entries. Shape `[tf.sum(sequence_lengths)]` if `sequence_lengths` is not `None` and shape `[batch_size * padded_length]` otherwise. """ with ops.name_scope('mask_activations_and_labels', values=[activations, labels, sequence_lengths]): labels_shape = array_ops.shape(labels) batch_size = labels_shape[0] padded_length = labels_shape[1] if sequence_lengths is None: flattened_dimension = padded_length * batch_size activations_masked = array_ops.reshape(activations, [flattened_dimension, -1]) labels_masked = array_ops.reshape(labels, [flattened_dimension]) else: mask = array_ops.sequence_mask(sequence_lengths, padded_length) activations_masked = array_ops.boolean_mask(activations, mask) labels_masked = array_ops.boolean_mask(labels, mask) return activations_masked, labels_masked
def _SumGrad(op, grad): """Gradient for Sum.""" # Fast path for when reducing to a scalar and ndims is known: adds only # Reshape and Tile ops (and possibly a Shape). input_0_shape = op.inputs[0]._shape_tuple() # pylint: disable=protected-access if input_0_shape is not None: axes = tensor_util.constant_value(op.inputs[1]) if axes is not None: rank = len(input_0_shape) if np.array_equal(axes, np.arange(rank)): # Reduce all dims. grad = array_ops.reshape(grad, [1] * rank) # If shape is not fully defined (but rank is), we use Shape. if None not in input_0_shape: input_shape = input_0_shape else: input_shape = array_ops.shape(op.inputs[0]) return [array_ops.tile(grad, input_shape), None] input_shape = array_ops.shape(op.inputs[0]) # TODO(apassos) remove this once device placement for eager ops makes more # sense. with ops.colocate_with(input_shape): output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) return [array_ops.tile(grad, tile_scaling), None]
def _weighted_loss(loss, weight): """Returns cumulative weighted loss.""" unweighted_loss = array_ops.reshape(loss, shape=(-1,)) weighted_loss = math_ops.mul(unweighted_loss, array_ops.reshape( weight, shape=(-1,))) return weighted_loss
def __call__(self, inputs, state, scope=None): """Long short-term memory cell with attention (LSTMA).""" with vs.variable_scope(scope or type(self).__name__): if self._state_is_tuple: state, attns, attn_states = state else: states = state state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size]) attns = array_ops.slice( states, [0, self._cell.state_size], [-1, self._attn_size]) attn_states = array_ops.slice( states, [0, self._cell.state_size + self._attn_size], [-1, self._attn_size * self._attn_length]) attn_states = array_ops.reshape(attn_states, [-1, self._attn_length, self._attn_size]) input_size = self._input_size if input_size is None: input_size = inputs.get_shape().as_list()[1] inputs = _linear([inputs, attns], input_size, True) lstm_output, new_state = self._cell(inputs, state) if self._state_is_tuple: new_state_cat = array_ops.concat(1, _unpacked_state(new_state)) else: new_state_cat = new_state new_attns, new_attn_states = self._attention(new_state_cat, attn_states) with vs.variable_scope("AttnOutputProjection"): output = _linear([lstm_output, new_attns], self._attn_size, True) new_attn_states = array_ops.concat(1, [new_attn_states, array_ops.expand_dims(output, 1)]) new_attn_states = array_ops.reshape( new_attn_states, [-1, self._attn_length * self._attn_size]) new_state = (new_state, new_attns, new_attn_states) if not self._state_is_tuple: new_state = array_ops.concat(1, list(new_state)) return output, new_state
def testBiasVec(self): with self.assertRaises(ValueError): nn_ops.bias_add( array_ops.reshape( [1, 2], shape=[1, 2]), array_ops.reshape( [1, 2], shape=[1, 2]))
def _strict_conv1d(x, h): """Return x * h for rank 1 tensors x and h.""" with ops.op_scope([x, h], 'strict_conv1d'): x = array_ops.reshape(x, (1, -1, 1, 1)) h = array_ops.reshape(h, (-1, 1, 1, 1)) result = nn_ops.conv2d(x, h, [1, 1, 1, 1], 'SAME') return array_ops.reshape(result, [-1])
def _inplace_helper(x, i, v, op): """Applies an inplace op on (x, i, v). op is one of gen_array_ops.alias_inplace_update, gen_array_ops.alias_inplace_add, or gen_array_ops.alias_inplace_sub. If i is None, x and v must be the same shape. Computes x op v; If i is a scalar, x has a rank 1 higher than v's. Computes x[i, :] op v; Otherwise, x and v must have the same rank. Computes x[i, :] op v; Args: x: A Tensor. i: None, a scalar or a vector. v: A Tensor. op: alias_inplace_update, alias_inplace_add, or alias_inplace_sub. Returns: Returns x. """ x = ops.convert_to_tensor(x) v = ops.convert_to_tensor(v, x.dtype) if i is None: # Full tensor. return array_ops.reshape( op(array_ops.reshape(x, [1, -1]), [0], array_ops.reshape(v, [1, -1])), array_ops.shape(x)) i = math_ops.cast(i, dtypes.int32) if i.get_shape().ndims == 0: # Single 0-dim update. return op(x, array_ops.reshape(i, [1]), array_ops.expand_dims(v, 0)) return op(x, i, v)
def _expand_sample_shape_to_vector(self, x, name): """Helper to `sample` which ensures input is 1D.""" x_static_val = tensor_util.constant_value(x) if x_static_val is None: prod = math_ops.reduce_prod(x) else: prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype()) ndims = x.get_shape().ndims # != sample_ndims if ndims is None: # Maybe expand_dims. ndims = array_ops.rank(x) expanded_shape = util.pick_vector( math_ops.equal(ndims, 0), np.array([1], dtype=np.int32), array_ops.shape(x)) x = array_ops.reshape(x, expanded_shape) elif ndims == 0: # Definitely expand_dims. if x_static_val is not None: x = ops.convert_to_tensor( np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()), name=name) else: x = array_ops.reshape(x, [1]) elif ndims != 1: raise ValueError("Input is neither scalar nor vector.") return x, prod
def testInputDims(self): with self.test_session(use_gpu=True): with self.assertRaises(ValueError): array_ops.pad(array_ops.reshape( [1, 2], shape=[1, 2, 1, 1, 1, 1]), array_ops.reshape( [1, 2], shape=[1, 2]))
def testPaddingsDim4(self): with self.test_session(use_gpu=True): with self.assertRaises(ValueError): array_ops.pad(array_ops.reshape( [1, 2], shape=[1, 2]), array_ops.reshape( [1, 2, 3, 4, 5, 6], shape=[3, 2]))
def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n, **extra_args): use_kwargs = extra_args.pop('use_kwargs', False) inputs = [ ragged_tensor.convert_to_tensor_or_ragged_tensor(x) for x in inputs ] if use_kwargs: result = op(inputs=inputs, **extra_args) else: result = op(inputs, **extra_args) # Run the wrapped op on the dense values, for comparison. dense_inputs = [ x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x for x in inputs ] expected_flat_values = array_ops.reshape( op(dense_inputs, **extra_args), [-1]) # Check that the result has the expected shape. self.assertSameShape(inputs[0], result) # Check that the result has the expected (flattened) values. if isinstance(result, ragged_tensor.RaggedTensor): result_flat_values = array_ops.reshape(result.flat_values, [-1]) else: result_flat_values = array_ops.reshape(result, [-1]) self.assertAllEqual(expected_flat_values, result_flat_values)
def reduce_to_final(images, num_filters_out, nhidden=None, scope=None): """Reduce an image to a final state by running two LSTMs. Args: images: (num_images, height, width, depth) tensor num_filters_out: output layer depth nhidden: hidden layer depth (defaults to num_filters_out) scope: optional scope name Returns: A (num_images, num_filters_out) batch. """ with variable_scope.variable_scope(scope, "ReduceToFinal", [images]): nhidden = nhidden or num_filters_out batch_size, height, width, depth = _shape(images) transposed = array_ops.transpose(images, [1, 0, 2, 3]) reshaped = array_ops.reshape(transposed, [height, batch_size * width, depth]) with variable_scope.variable_scope("reduce1"): reduced = lstm1d.sequence_to_final(reshaped, nhidden) transposed_hidden = array_ops.reshape(reduced, [batch_size, width, nhidden]) hidden = array_ops.transpose(transposed_hidden, [1, 0, 2]) with variable_scope.variable_scope("reduce2"): output = lstm1d.sequence_to_final(hidden, num_filters_out) return output
def column_to_tensors(tensors_template, colvec): """Converts a column vector back to the shape of the given template. Args: tensors_template: A tensor or list of tensors. colvec: A 2d column vector with the same shape as the value of tensors_to_column(tensors_template). Returns: X, where X is tensor or list of tensors with the properties: 1) tensors_to_column(X) = colvec 2) X (or its elements) have the same shape as tensors_template (or its elements) """ if isinstance(tensors_template, (tuple, list)): offset = 0 tensors = [] for tensor_template in tensors_template: sz = np.prod(tensor_template.shape.as_list(), dtype=np.int32) tensor = array_ops.reshape(colvec[offset:(offset + sz)], tensor_template.shape) tensors.append(tensor) offset += sz tensors = tuple(tensors) else: tensors = array_ops.reshape(colvec, tensors_template.shape) return tensors
def testBinaryElementwiseOp(self, x, y, op=math_ops.add, **extra_args): use_kwargs = extra_args.pop('use_kwargs', ()) x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x) y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y) if 'x' in use_kwargs and 'y' in use_kwargs: result = op(x=x, y=y, **extra_args) elif 'y' in use_kwargs: result = op(x, y=y, **extra_args) else: result = op(x, y, **extra_args) # Run the wrapped op on the dense values, for comparison. dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x dense_y = y.flat_values if isinstance(y, ragged_tensor.RaggedTensor) else y expected_flat_values = array_ops.reshape( op(dense_x, dense_y, **extra_args), [-1]) # Check that the result has the expected shape. self.assertSameShape(y, result) # Check that the result has the expected (flattened) values. if isinstance(result, ragged_tensor.RaggedTensor): result_flat_values = array_ops.reshape(result.flat_values, [-1]) else: result_flat_values = array_ops.reshape(result, [-1]) self.assertAllEqual(expected_flat_values, result_flat_values)
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros( [], dtype=dtypes.int32)]
def kronecker_product(mat1, mat2): """Computes the Kronecker product two matrices.""" m1, n1 = mat1.get_shape().as_list() mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1]) m2, n2 = mat2.get_shape().as_list() mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2]) return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2])
def to_weighted_sum(self, input_tensor, num_outputs=1, weight_collections=None, trainable=True): """Returns a Tensor as linear predictions and a list of created Variable.""" dimension = self.source_column.dimension batch_size = array_ops.shape(input_tensor)[0] if dimension > 1: i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims( math_ops.range(0, batch_size), 1), [1, dimension]), [-1]) i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets # TODO(chapelle): move that logic to insert_transformed_feature to ensure # unique buckets across dimensions after crossing. bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2 else: # Simpler indices when dimension=1 i1 = math_ops.range(0, batch_size) i2 = array_ops.zeros([batch_size], dtype=dtypes.int32) bucket_indices = array_ops.reshape(input_tensor, [-1]) indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2)))) shape = math_ops.to_int64(array_ops.pack([batch_size, 1])) sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape) vocab_size = self.length * self.source_column.dimension return _create_embedding_lookup( sparse_id_values, vocab_size, num_outputs, _add_variable_collection(weight_collections), 0., "sum", trainable, self.name + "_weights")
def call(self, inputs): outputs = nn.convolution( input=inputs, filter=self.masked_kernel, dilation_rate=self.dilation_rate, strides=self.strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, self.rank + 2)) if self.bias is not None: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias if self.rank == 2: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') if self.rank == 3: # As of Mar 2017, direct addition is significantly slower than # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() outputs_4d = array_ops.reshape(outputs, [ outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], outputs_shape[4] ]) outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') outputs = array_ops.reshape(outputs_4d, outputs_shape) else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def testSubsampleThreeByThree(self): x = array_ops.reshape(math_ops.to_float(math_ops.range(9)), [1, 3, 3, 1]) x = resnet_utils.subsample(x, 2) expected = array_ops.reshape( constant_op.constant([0, 2, 6, 8]), [1, 2, 2, 1]) with self.test_session(): self.assertAllClose(x.eval(), expected.eval())
def testSubsampleFourByFour(self): x = array_ops.reshape(math_ops.to_float(math_ops.range(16)), [1, 4, 4, 1]) x = resnet_utils.subsample(x, 2) expected = array_ops.reshape( constant_op.constant([0, 2, 8, 10]), [1, 2, 2, 1]) with self.test_session(): self.assertAllClose(x.eval(), expected.eval())
def test_discriminator_patch(self): loss = self._d_loss_fn( array_ops.reshape(self._discriminator_real_outputs, [2, 2]), array_ops.reshape(self._discriminator_gen_outputs, [2, 2])) self.assertEqual(self._discriminator_gen_outputs.dtype, loss.dtype) with self.test_session(): self.assertAlmostEqual(self._expected_d_loss, loss.eval(), 5)
def _apply_sparse(self, grad, var): if len(grad.indices.get_shape()) == 1: grad_indices = grad.indices grad_values = grad.values else: grad_indices = array_ops.reshape(grad.indices, [-1]) grad_values = array_ops.reshape(grad.values, [-1, grad.values.get_shape()[-1].value]) gidxs, metagidxs = array_ops.unique(grad_indices) sizegidxs = array_ops.size(gidxs) gvals = math_ops.unsorted_segment_sum(grad_values, metagidxs, sizegidxs) # m_t = mu * m + (1 - mu) * g_t m = self.get_slot(var, "m") m_scaled_g_values = gvals * (1 - self._mu_t) m_t = state_ops.scatter_update(m, gidxs, array_ops.gather(m, gidxs) * self._mu_t, use_locking=self._use_locking) m_t = state_ops.scatter_add(m_t, gidxs, m_scaled_g_values, use_locking=self._use_locking) m_t_ = array_ops.gather(m_t, gidxs) / (1 - self._mu2_t * self._mu_power) # m_bar = mu * m_t + (1 - mu) * g_t m_bar = self._mu2_t * m_t_ + m_scaled_g_values / (1 - self._mu_power) var_update = state_ops.scatter_sub(var, gidxs, self._lr_t * m_bar, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t])
def _fn(x): """MADE parameterized via `masked_autoregressive_default_template`.""" # TODO(b/67594795): Better support of dynamic shape. input_depth = x.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined() else array_ops.shape(x)) for i, units in enumerate(hidden_layers): x = masked_dense( inputs=x, units=units, num_blocks=input_depth, exclusive=True if i == 0 else False, activation=activation, *args, **kwargs) x = masked_dense( inputs=x, units=(1 if shift_only else 2) * input_depth, num_blocks=input_depth, activation=None, *args, **kwargs) if shift_only: x = array_ops.reshape(x, shape=input_shape) return x, None x = array_ops.reshape( x, shape=array_ops.concat([input_shape, [2]], axis=0)) shift, log_scale = array_ops.unstack(x, num=2, axis=-1) which_clip = (math_ops.clip_by_value if log_scale_clip_gradient else _clip_by_value_preserve_grad) log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip) return shift, log_scale
def _TileGrad(op, grad): """Sum reduces grad along the tiled dimensions.""" input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even # dimensions (the tiled dimensions) to get the result # with shape input_shape. For example # input_shape = [20, 30, 40] # multiples = [2, 3, 4] # split_shape = [2, 20, 3, 30, 4, 40] # axes = [0, 2, 4] split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) # Sum reduces grad along the first dimension for IndexedSlices if isinstance(grad, ops.IndexedSlices): grad = math_ops.unsorted_segment_sum( grad.values, math_ops.mod(grad.indices, input_shape[0]), input_shape[0]) split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference if not context.executing_eagerly(): input_grad.set_shape(op.inputs[0].get_shape()) return [input_grad, None]
def embedding_lookup(params, ids, name='embedding_lookup'): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with ops.name_scope(name, 'embedding_lookup', [params, ids]): params = ops.convert_to_tensor(params) ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat_v2([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def loss_fn(): y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y
def _parse_example_raw(serialized, names=None, sparse_keys=None, sparse_types=None, dense_keys=None, dense_types=None, dense_defaults=None, dense_shapes=None, name=None): """Parses `Example` protos. Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos. sparse_keys: A list of string keys in the examples' features. The results for these keys will be returned as `SparseTensor` objects. sparse_types: A list of `DTypes` of the same length as `sparse_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. dense_keys: A list of string keys in the examples' features. The results for these keys will be returned as `Tensor`s dense_types: A list of DTypes of the same length as `dense_keys`. Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. dense_defaults: A dict mapping string keys to `Tensor`s. The keys of the dict must match the dense_keys of the feature. dense_shapes: A list of tuples with the same length as `dense_keys`. The shape of the data for each dense feature referenced by `dense_keys`. Required for any input tensors identified by `dense_keys` whose shapes are anything other than `[]` or `[1]`. name: A name for this operation (optional). Returns: A `dict` mapping keys to `Tensor`s and `SparseTensor`s. Raises: ValueError: If sparse and dense key sets intersect, or input lengths do not match up. """ with ops.op_scope([serialized, names], name, "ParseExample"): names = [] if names is None else names dense_defaults = {} if dense_defaults is None else dense_defaults sparse_keys = [] if sparse_keys is None else sparse_keys sparse_types = [] if sparse_types is None else sparse_types dense_keys = [] if dense_keys is None else dense_keys dense_types = [] if dense_types is None else dense_types dense_shapes = ([[]] * len(dense_keys) if dense_shapes is None else dense_shapes) num_dense = len(dense_keys) num_sparse = len(sparse_keys) if len(dense_shapes) != num_dense: raise ValueError( "len(dense_shapes) != len(dense_keys): %d vs. %d" % (len(dense_shapes), num_dense)) if len(dense_types) != num_dense: raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" % (len(dense_types), num_dense)) if len(sparse_types) != num_sparse: raise ValueError( "len(sparse_types) != len(sparse_keys): %d vs. %d" % (len(sparse_types), num_sparse)) if num_dense + num_sparse == 0: raise ValueError( "Must provide at least one sparse key or dense key") if not set(dense_keys).isdisjoint(set(sparse_keys)): raise ValueError( "Dense and sparse keys must not intersect; intersection: %s" % set(dense_keys).intersection(set(sparse_keys))) dense_defaults_vec = [] for i, key in enumerate(dense_keys): default_value = dense_defaults.get(key) if default_value is None: default_value = constant_op.constant([], dtype=dense_types[i]) elif not isinstance(default_value, ops.Tensor): key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) default_value = ops.convert_to_tensor(default_value, dtype=dense_types[i], name=key_name) default_value = array_ops.reshape(default_value, dense_shapes[i]) dense_defaults_vec.append(default_value) dense_shapes = [ tensor_shape.as_shape(shape).as_proto() for shape in dense_shapes ] # pylint: disable=protected-access outputs = gen_parsing_ops._parse_example( serialized=serialized, names=names, dense_defaults=dense_defaults_vec, sparse_keys=sparse_keys, sparse_types=sparse_types, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) # pylint: enable=protected-access (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs sparse_tensors = [ ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(sparse_indices, sparse_values, sparse_shapes) ] return dict( zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] input_bhwc = array_ops.shape(op.inputs[0]) batch_size = input_bhwc[0] channels = input_bhwc[3] _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") _, stride_r, stride_h, _ = op.get_attr("strides") _, rate_r, rate_c, _ = op.get_attr("rates") padding = op.get_attr("padding") ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) if padding == b"SAME": rows_out = int(ceil(rows_in / stride_r)) cols_out = int(ceil(cols_in / stride_h)) pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 elif padding == b"VALID": rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) grad_expanded = array_ops.transpose( array_ops.reshape( grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) idx = [] for i in range(rows_out): for j in range(cols_out): r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff idx.extend([ (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) for (ri, r) in enumerate(range(r_low, r_high, rate_r)) for (ci, c) in enumerate(range(c_low, c_high, rate_c)) if 0 <= r and r < rows_in and 0 <= c and c < cols_in ]) sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) sp_mat = sparse_tensor.SparseTensor( array_ops.constant(idx, dtype=ops.dtypes.int64), array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return [grad_out]
def _ReshapeToInput(op, grad): """Reshapes the gradient to the shape of the original input.""" return array_ops.reshape(grad, array_ops.shape(op.inputs[0]))
def _ReshapeGrad(op, grad): return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: if context.executing_eagerly(): params_tail_shape = params_shape.cpu()[1:] else: params_tail_shape = params_shape[1:] values_shape = array_ops.concat([indices_size, params_tail_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum(values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def triplet_semihard_loss(labels, embeddings, metric, margin=1.0): """Computes the triplet loss with semi-hard negative mining. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than the minimum negative distance among which are at least greater than the positive distance plus the margin constant (called semi-hard negative) in the mini-batch. If no such negative exists, uses the largest negative distance instead. See: https://arxiv.org/abs/1503.03832. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. margin: Float, margin term in the loss definition. Returns: triplet_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = metric(embeddings) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) # Compute the mask. pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1]) mask = math_ops.logical_and( array_ops.tile(adjacency_not, [batch_size, 1]), math_ops.greater( pdist_matrix_tile, array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1]))) mask_final = array_ops.reshape( math_ops.greater( math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True), 0.0), [batch_size, batch_size]) mask_final = array_ops.transpose(mask_final) adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32) mask = math_ops.cast(mask, dtype=dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = array_ops.reshape( masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = array_ops.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = array_ops.tile( masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = array_ops.where(mask_final, negatives_outside, negatives_inside) loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = math_ops.reduce_sum(mask_positives) triplet_loss = math_ops.truediv(math_ops.reduce_sum( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)), num_positives, name='triplet_semihard_loss') return triplet_loss, 0
def loss_fn(x): y = array_ops.reshape(gen_math_ops.mat_mul(x, kernel), []) - constant_op.constant(1.) return y * y
def _argsort(a, axis, stable): if axis is None: a = array_ops.reshape(a, [-1]) axis = 0 return sort_ops.argsort(a, axis, stable=stable)
def __new__(cls, mode, predictions=None, loss=None, train_op=None, eval_metric_ops=None, export_outputs=None, training_chief_hooks=None, training_hooks=None, scaffold=None): """Creates a validated `EstimatorSpec` instance. Depending on the value of `mode`, different arguments are required. Namely * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`. * For `mode == ModeKeys.EVAL`: required field is`loss`. * For `mode == ModeKeys.PREDICT`: required fields are `predictions`. model_fn can populate all arguments independent of mode. In this case, some arguments will be ignored by `Estimator`. E.g. `train_op` will be ignored in eval and infer modes. Example: ```python def my_model_fn(mode, features, labels): predictions = ... loss = ... train_op = ... return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op) ``` Alternatively, model_fn can just populate the arguments appropriate to the given mode. Example: ```python def my_model_fn(mode, features, labels): if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): loss = ... else: loss = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = ... else: train_op = None if mode == tf.estimator.ModeKeys.PREDICT: predictions = ... else: predictions = None return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op) ``` Args: mode: A `ModeKeys`. Specifies if this is training, evaluation or prediction. predictions: Predictions `Tensor` or dict of `Tensor`. loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`. train_op: Op for the training step. eval_metric_ops: Dict of metric results keyed by name. The values of the dict are the results of calling a metric function, namely a `(metric_tensor, update_op)` tuple. export_outputs: Describes the output signatures to be exported to `SavedModel` and used during serving. A dict `{name: output}` where: * name: An arbitrary name for this output. * output: an `ExportOutput` object such as `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. Single-headed models only need to specify one entry in this dictionary. Multi-headed models should specify one entry for each head, one of which must be named using signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY. training_chief_hooks: A list of `tf.train.SessionRunHook` objects to run on the chief worker during training. training_hooks: A list of `tf.train.SessionRunHook` objects that to run on all workers during training. scaffold: A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training. Returns: A validated `EstimatorSpec` object. Raises: ValueError: If validation fails. TypeError: If any of the arguments is not the expected type. """ # Validate train_op. if train_op is None: if mode == ModeKeys.TRAIN: raise ValueError('Missing train_op.') else: _check_is_tensor_or_operation(train_op, 'train_op') # Validate loss. if loss is None: if mode in (ModeKeys.TRAIN, ModeKeys.EVAL): raise ValueError('Missing loss.') else: loss = _check_is_tensor(loss, 'loss') loss_shape = loss.get_shape() if loss_shape.num_elements() not in (None, 1): raise ValueError('Loss must be scalar, given: {}'.format(loss)) if not loss_shape.is_compatible_with(tensor_shape.scalar()): loss = array_ops.reshape(loss, []) # Validate predictions. if predictions is None: if mode == ModeKeys.PREDICT: raise ValueError('Missing predictions.') predictions = {} else: if isinstance(predictions, dict): predictions = { k: _check_is_tensor(v, 'predictions[{}]'.format(k)) for k, v in six.iteritems(predictions) } else: predictions = _check_is_tensor(predictions, 'predictions') # Validate eval_metric_ops. if eval_metric_ops is None: eval_metric_ops = {} else: if not isinstance(eval_metric_ops, dict): raise TypeError( 'eval_metric_ops must be a dict, given: {}'.format( eval_metric_ops)) for key, metric_value in six.iteritems(eval_metric_ops): if (not isinstance(metric_value, tuple) or len(metric_value) != 2): raise TypeError( 'Values of eval_metric_ops must be (metric_tensor, update_op) ' 'tuples, given: {} for key: {}'.format( metric_value, key)) _check_is_tensor_or_operation( metric_value[0], 'eval_metric_ops[{}]'.format(key)) _check_is_tensor_or_operation( metric_value[1], 'eval_metric_ops[{}]'.format(key)) # Validate export_outputs. if export_outputs is not None: if not isinstance(export_outputs, dict): raise TypeError( 'export_outputs must be dict, given: {}'.format( export_outputs)) for v in six.itervalues(export_outputs): if not isinstance(v, ExportOutput): raise TypeError( 'Values in export_outputs must be ExportOutput objects. ' 'Given: {}'.format(export_outputs)) # Note export_outputs is allowed to be empty. if len(export_outputs) == 1: (key, value), = export_outputs.items() if key != signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: export_outputs[signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value if len(export_outputs) > 1: if (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in export_outputs): raise ValueError( 'Multiple export_outputs were provided, but none of them is ' 'specified as the default. Do this by naming one of them with ' 'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.' ) # Validate that all tensors and ops are from the default graph. default_graph = ops.get_default_graph() for value in _prediction_values(predictions): if value.graph is not default_graph: raise ValueError( 'prediction values must be from the default graph.') if loss is not None and loss.graph is not default_graph: raise ValueError('loss must be from the default graph.') if train_op is not None and train_op.graph is not default_graph: raise ValueError('train_op must be from the default graph.') for value in _eval_metric_ops_values(eval_metric_ops): if value.graph is not default_graph: raise ValueError( 'eval_metric_ops values must be from the default graph.') # Validate hooks. if training_chief_hooks is None: training_chief_hooks = [] if training_hooks is None: training_hooks = [] for hook in training_hooks + training_chief_hooks: if not isinstance(hook, session_run_hook.SessionRunHook): raise TypeError( 'All hooks must be SessionRunHook instances, given: {}'. format(hook)) scaffold = scaffold or monitored_session.Scaffold() # Validate scaffold. if not isinstance(scaffold, monitored_session.Scaffold): raise TypeError( 'scaffold must be tf.train.Scaffold. Given: {}'.format( scaffold)) return super(EstimatorSpec, cls).__new__(cls, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, training_chief_hooks=training_chief_hooks, training_hooks=training_hooks, scaffold=scaffold)
def f(x): return array_ops.reshape( x, [math_ops.cast(a, dtypes.int32), math_ops.cast(b, dtypes.int32)])
def f(a, b): return array_ops.reshape(a, [-1, 1]) * array_ops.reshape(b, [-1])
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, sample_weight=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value or a python list or tuple of float thresholds in `[0, 1]`. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if variables_to_update is None: return y_true = ops.convert_to_tensor(y_true) y_pred = ops.convert_to_tensor(y_pred) y_pred.shape.assert_is_compatible_with(y_true.shape) if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): y_pred, y_true, sample_weight = squeeze_or_expand_dimensions( math_ops.cast(y_pred, dtype=dtypes.float32), math_ops.cast(y_true, dtype=dtypes.bool), sample_weight) thresholds = to_list(thresholds) num_thresholds = len(thresholds) num_predictions = array_ops.size(y_pred) # Reshape predictions and labels. predictions_2d = array_ops.reshape(y_pred, [1, -1]) labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. thresh_tiled = array_ops.tile( array_ops.expand_dims(array_ops.constant(thresholds), 1), array_ops.stack([1, num_predictions])) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1]) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) if sample_weight is not None: weights = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred) weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]), [num_thresholds, 1]) else: weights_tiled = None update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=dtypes.float32) if weights is not None: label_and_pred *= weights return state_ops.assign_add(var, math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def f(x, val1, val2): a.assign(math_ops.cast(val1, dtypes.float32)) b.assign(math_ops.cast(val2, dtypes.float32)) return array_ops.reshape( x, [math_ops.cast(a, dtypes.int32), math_ops.cast(b, dtypes.int32)])
def _reshape_for_efficiency(a, b, transpose_a=False, transpose_b=False, adjoint_a=False, adjoint_b=False): """Maybe reshape a, b, and return an inverse map. For matmul/solve.""" def identity(x): return x # At this point, we have not taken transpose/adjoint of a/b. still_need_to_transpose = True if a.shape.ndims is None or b.shape.ndims is None: return a, b, identity, still_need_to_transpose # This could be handled in the future, but seems less common. if a.shape.ndims >= b.shape.ndims: return a, b, identity, still_need_to_transpose # From now on, we might modify b, but will not modify a. # Suppose: # a.shape = C + [m, n], b.shape = # b.shape = S + C + [n, r] b_extra_ndims = b.shape.ndims - a.shape.ndims # b_extra_sh = S, b_main_sh = C + [n, r] b_extra_sh = array_ops.shape(b)[:b_extra_ndims] b_main_sh = array_ops.shape(b)[b_extra_ndims:] # No reason to flip unless the extra dims of b are big enough. Why? # Assume adjoint/transpose = False. Then... # By not flipping, we have to replicate a to shape # b_extra_sh + a.shape, # which could use extra memory. But in all cases, the final output has shape # b_extra_sh + a.shape[:-1] + [b.shape[-1]] # So we only end up creating a larger object if the end dim of b is smaller # than the end dim of a. This often happens, e.g. if b was a vector that was # expanded to a matrix (by appending a singleton). # Since adjoint/transpose may not be False, we must make adjustments here. # The dim of b that holds the multiple equations. a_domain_sz_ = a.shape[-2 if adjoint_a or transpose_a else -1] b_eq_sz_ = b.shape[-2 if adjoint_b or transpose_b else -1] b_extra_sz_ = (np.prod(b.shape[:b_extra_ndims].as_list()) if b.shape[:b_extra_ndims].is_fully_defined() else None) if (a_domain_sz_ is not None and b_eq_sz_ is not None and b_extra_sz_ is not None): if b_extra_sz_ < 2 or a_domain_sz_ <= b_eq_sz_: return a, b, identity, still_need_to_transpose # At this point, we're flipping for sure! # Any transposes/adjoints will happen here explicitly, rather than in calling # code. Why? To avoid having to write separate complex code for each case. if adjoint_a: a = linalg.adjoint(a) elif transpose_a: a = linalg.transpose(a) if adjoint_b: b = linalg.adjoint(b) elif transpose_b: b = linalg.transpose(b) still_need_to_transpose = False # Recompute shapes, since the transpose/adjoint may have changed them. b_extra_sh = array_ops.shape(b)[:b_extra_ndims] b_main_sh = array_ops.shape(b)[b_extra_ndims:] # Permutation to put the extra dims at the end. perm = (np.concatenate( (np.arange(b_extra_ndims, b.shape.ndims), np.arange(0, b_extra_ndims)), 0)) b_extra_on_end = array_ops.transpose(b, perm=perm) # Now squash this end into one long dim. b_squashed_end = array_ops.reshape( b_extra_on_end, array_ops.concat((b_main_sh[:-1], [-1]), 0)) def reshape_inv(y): # Expand the extra dims hanging off the end, "b_extra_sh". # Note we use y_sh[:-1] + [b_main_sh[-1]] rather than b_main_sh, because y # Could have different batch dims than a and b, because of broadcasting. y_extra_shape = array_ops.concat( (array_ops.shape(y)[:-1], [b_main_sh[-1]], b_extra_sh), 0) y_extra_on_end = array_ops.reshape(y, y_extra_shape) inverse_perm = np.argsort(perm) return array_ops.transpose(y_extra_on_end, perm=inverse_perm) return a, b_squashed_end, reshape_inv, still_need_to_transpose
def f(l): return array_ops.reshape(l, s)
def iris_input_fn(): iris = base.load_iris() features = array_ops.reshape( constant_op.constant(iris.data), [-1, _IRIS_INPUT_DIM]) labels = array_ops.reshape(constant_op.constant(iris.target), [-1]) return features, labels
def attention_single_output_decoder(initial_state, attention_states, output_size=None, num_heads=1, dtype=dtypes.float32, scope=None, sequence_length=array_ops.ones([16]), initial_state_attention=True, use_attention=False): if num_heads < 1: raise ValueError("With less than 1 heads, use a non-attention decoder.") if not attention_states.get_shape()[1:2].is_fully_defined(): raise ValueError("Shape[1] and [2] of attention_states must be known: %s" % attention_states.get_shape()) with variable_scope.variable_scope(scope or "decoder_single_output"): # print (initial_state.eval().shape) batch_size = array_ops.shape(initial_state)[0] # Needed for reshaping. # print (attention_states.get_shape()) attn_length = attention_states.get_shape()[1].value attn_size = attention_states.get_shape()[2].value # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape( attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] v = [] attention_vec_size = attn_size # Size of query vectors for attention. for a in xrange(num_heads): k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) v.append(variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])) # state = initial_state def attention(query, use_attention=False): """Put attention masks on hidden using hidden_features and query.""" attn_weights = [] ds = [] # Results of attention reads will be stored here. for i in xrange(num_heads): with variable_scope.variable_scope("Attention_%d" % i): y = rnn_cell._linear(query, attention_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum( v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3]) if use_attention is False: # apply mean pooling weights = tf.tile(sequence_length, tf.stack([attn_length])) weights = array_ops.reshape(weights, tf.shape(s)) a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(weights) # a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1]) else: a = nn_ops.softmax(s) attn_weights.append(a) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return attn_weights, ds batch_attn_size = array_ops.stack([batch_size, attn_size]) attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) if initial_state_attention: attn_weights, attns = attention(initial_state, use_attention=use_attention) #with variable_scope.variable_scope(scope or "Linear"): matrix = variable_scope.get_variable("Out_Matrix", [attn_size, output_size]) res = math_ops.matmul(attns[0], matrix) # NOTE: here we temporarily assume num_head = 1 bias_start = 0.0 bias_term = variable_scope.get_variable("Out_Bias", [output_size], initializer=init_ops.constant_initializer(bias_start)) output = res + bias_term return attention_states, attn_weights[0], attns[0], [output] # NOTE: here we temporarily assume num_head = 1
def even_s(off, size): off = array_ops.reshape(off, [-1, size//2, 2]) off = array_ops.reshape(array_ops.reverse(off, [2]), [-1, size]) return off
def loop_fn(i): x1 = array_ops.gather(x, i) return array_ops.reshape(x1, [-1]), array_ops.reshape(x1, [1, 3, 1, -1])
def __call__(self, inputs, state, scope=None): # global h,e_ti,z_i,alpha_ti """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) ## seperate inputs into word imbedding and image subfeatures shape = inputs.get_shape().as_list() # print("shape ") # print(shape) batch_size = shape[0] print("inputs.get_shape[0]") print(batch_size) hsize=h.get_shape() print("hidden state length") print(hsize[1].value) #padded_length = shape[1].value single_input_length = shape[1] # print("single_input_length ") # print(single_input_length) word_imbedding_length = 512 #subfeature_length = 192#(single_input_length-word_imbedding_length)/subfeature_num; subfeature_length = 768 # subfeature_num = int((single_input_length-word_imbedding_length)/subfeature_length) # subfeature_num = 35*35 subfeature_num = 17*17 #batch_size_ops # batch_size=32 tensorShape=tf.shape(inputs) #z_i = array_ops.zeros([batch_size,subfeature_length]) z_i=array_ops.zeros(tf.pack([tensorShape[0],subfeature_length])) print('inputs:') print(inputs) # print("Initial z_i:") # print(z_i) state_length = self._num_units # with vs.variable_scope(scope or type(self).__name__,initializer=self._initializer): #f_att_matrix = vs.get_variable(name="f_att_matrix",shape = (subfeature_length,state_length), initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32) mid_layer_size = 300 W1 = vs.get_variable(name="w1",shape=(hsize[1].value+subfeature_length,mid_layer_size),initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32) W2 = vs.get_variable(name="w2",shape=(mid_layer_size,1),initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32) b1 = vs.get_variable(name="b1",shape=(1,mid_layer_size),initializer=tf.zeros_initializer,dtype=tf.float32) b2 = vs.get_variable(name="b2",shape=(1,1),initializer=tf.zeros_initializer,dtype=tf.float32) word_imbeddings=inputs[:,0:word_imbedding_length] alpha_ti = [] if single_input_length != word_imbedding_length: image_subfeatures=inputs[:,word_imbedding_length:single_input_length] #tf.summary.histogram("tensors/" + "subfeatures", image_subfeatures) #net2 = tf.reshape(net2, [shape2[0].value, -1, shape2[3].value]) #image_subfeatures=array_ops.reshape(image_subfeatures,[batch_size,subfeature_num,subfeature_length]) image_subfeatures=array_ops.reshape(image_subfeatures,tf.pack([tensorShape[0],subfeature_num,subfeature_length])) # f_att_matrix_exp=tf.expand_dims(f_att_matrix,0) # f_att_matrix_tile=tf.tile(f_att_matrix_exp,tf.pack([batch_size,1,1])) # print("fatt,fatt_exp,fatt_tile") # print(f_att_matrix) # print(f_att_matrix_exp) # print(f_att_matrix_tile) # tf.Print(f_att_matrix,[f_att_matrix]) # h=tf.expand_dims(h,2) # e_ti = math_ops.matmul(math_ops.matmul(tf.sigmoid(image_subfeatures),f_att_matrix_tile),h) # e_ti =array_ops.zeros([batch_size,subfeature_num]) W1_matrix=tf.expand_dims(W1,0) #[1,state_length+subfeature_length,mid_layer_size] W1_matrix=tf.tile(W1_matrix,tf.pack([tensorShape[0],1,1])) #[batchsize,state_length+subfeature_length,mid_layer_size] W2_matrix=tf.expand_dims(W2,0) W2_matrix=tf.tile(W2_matrix,tf.pack([tensorShape[0],1,1])) b1_matrix=tf.expand_dims(b1,0) #[1,1,mid_layer_size] b1_matrix=tf.tile(b1_matrix,tf.pack([tensorShape[0],1,1])) b2_matrix=tf.expand_dims(b2,0) #[1,1,mid_layer_size] b2_matrix=tf.tile(b2_matrix,tf.pack([tensorShape[0],1,1])) h_matrix=tf.expand_dims(h,1) # [batchsize,1,state_length] h_matrix=tf.tile(h_matrix,[1,subfeature_num,1]) #[batchsize,subfeature_num,state_length] x1 = tf.concat(2,[h_matrix,image_subfeatures]) #[batchsize,subfeature_num,state_length+subfeature_length] #x2 = tf.tanh(math_ops.matmul(x1,W1_matrix)+b1_matrix) #[batchsize,subfeature_num,mid_layer_size] x2 = tf.nn.relu(math_ops.matmul(x1,W1_matrix)+b1_matrix) #[batchsize,subfeature_num,mid_layer_size] #e_ti = tf.tanh(math_ops.matmul(x2,W2_matrix)+b2_matrix) #[batchsize,subfeature_num,1] e_ti = tf.nn.relu(math_ops.matmul(x2,W2_matrix)+b2_matrix) #[batchsize,subfeature_num,1] #e_ti = tf.squeeze(e_ti,[2]) #[batchsize,subfeature_num] alpha_ti = nn_ops.softmax(e_ti,dim=1) #[batchsize,subfeature_num,1] # e_ti=[] # for i in range(subfeature_num): # x1 = tf.concat(1,[h,image_subfeatures[:,i,:]]) # x2 = tf.tanh(math_ops.matmul(x1,W1)+b1) # x3 = tf.tanh(math_ops.matmul(x2,W2)+b2) # e_ti.append(x3) # # e_ti = self.f_att(image_subfeatures,subfeature_length,h,scope) # print("x1") # print(x1) # print("x2") # print(x2) # print("x3") # print(x3) # e_ti=tf.transpose(tf.pack(e_ti),[1,0,2]) # print("e_ti") # print(e_ti) # alpha_ti = nn_ops.softmax(e_ti) #tf.summary.histogram("tensors/" + "alpha_ti", alpha_ti) # z_i = math_ops.reduce_sum(math_ops.matmul(tf.transpose(image_subfeatures,[0,2,1]),alpha_ti),axis=1) z_i = math_ops.matmul(tf.transpose(image_subfeatures,[0,2,1]),alpha_ti) # h=tf.squeeze(h,[2]) z_i=tf.squeeze(z_i,squeeze_dims=[2]) print("squeezed z_i") print(z_i) #tf.summary.histogram("tensors/" + "z_i", z_i) #tf.summary.histogram("tensors/" + "h", h) concat = _linear([word_imbeddings, h, z_i], 4 * self._num_units, True) ### # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state, alpha_ti, z_i, word_imbeddings
def modify(off_normal, dist, normal_size): off_normal = array_ops.reshape(array_ops.reverse(array_ops.reshape(off_normal, [-1, normal_size//(2**dist), 2, (2**(dist-1))]), [2]), [-1, normal_size]) return off_normal
def _ReshapeGrad(op, grad): return [ array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad), array_ops.shape(op.inputs[0])), None ]
def _eunn_param(hidden_size, capacity=2, fft=False, comp=True, name=None): """ Create parameters and do the initial preparations """ theta_phi_initializer = init_ops.random_uniform_initializer(-np.pi, np.pi) if fft: capacity = int(np.ceil(np.log2(hidden_size))) diag_list_0 = [] off_list_0 = [] varsize = 0 for i in range(capacity): size = capacity - i normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1)) extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1))) varsize += normal_size + extra_size params_theta = vs.get_variable(name+"theta_0", [varsize], initializer=theta_phi_initializer) cos_theta = math_ops.cos(params_theta) sin_theta = math_ops.sin(params_theta) if comp: params_phi = vs.get_variable(name+"phi_0", [varsize], initializer=theta_phi_initializer) cos_phi = math_ops.cos(params_phi) sin_phi = math_ops.sin(params_phi) cos_list_0 = math_ops.complex(cos_theta, array_ops.zeros_like(cos_theta)) cos_list_1 = math_ops.complex(math_ops.multiply(cos_theta, cos_phi), math_ops.multiply(cos_theta, sin_phi)) sin_list_0 = math_ops.complex(sin_theta, array_ops.zeros_like(sin_theta)) sin_list_1 = math_ops.complex(-math_ops.multiply(sin_theta, cos_phi), -math_ops.multiply(sin_theta, sin_phi)) last = 0 for i in range(capacity): size = capacity - i normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1)) extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1))) if comp: cos_list_normal = array_ops.concat([array_ops.slice(cos_list_0, [last], [normal_size]), array_ops.slice(cos_list_1, [last], [normal_size])], 0) sin_list_normal = array_ops.concat([array_ops.slice(sin_list_0, [last], [normal_size]), -array_ops.slice(sin_list_1, [last], [normal_size])], 0) last += normal_size cos_list_extra = array_ops.concat([array_ops.slice(cos_list_0, [last], [extra_size]), math_ops.complex(tf.ones([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), array_ops.slice(cos_list_1, [last], [extra_size])], 0) sin_list_extra = array_ops.concat([array_ops.slice(sin_list_0, [last], [extra_size]), math_ops.complex(tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), -array_ops.slice(sin_list_1, [last], [extra_size])], 0) last += extra_size else: cos_list_normal = array_ops.slice(cos_theta, [last], [normal_size]) cos_list_normal = array_ops.concat([cos_list_normal, cos_list_normal], 0) cos_list_extra = array_ops.slice(cos_theta, [last+normal_size], [extra_size]) cos_list_extra = array_ops.concat([cos_list_extra, tf.ones([hidden_size - 2*normal_size - 2*extra_size]), cos_list_extra], 0) sin_list_normal = array_ops.slice(sin_theta, [last], [normal_size]) sin_list_normal = array_ops.concat([sin_list_normal, -sin_list_normal], 0) sin_list_extra = array_ops.slice(sin_theta, [last+normal_size], [extra_size]) sin_list_extra = array_ops.concat([sin_list_extra, tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), -sin_list_extra], 0) last += normal_size + extra_size if normal_size != 0: cos_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(cos_list_normal, [-1, 2*normal_size//(2**size)])), [-1]) sin_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(sin_list_normal, [-1, 2*normal_size//(2**size)])), [-1]) cos_list = array_ops.concat([cos_list_normal, cos_list_extra], 0) sin_list = array_ops.concat([sin_list_normal, sin_list_extra], 0) diag_list_0.append(cos_list) off_list_0.append(sin_list) diag_vec = array_ops.stack(diag_list_0, 0) off_vec = array_ops.stack(off_list_0, 0) else: capacity_b = capacity//2 capacity_a = capacity - capacity_b hidden_size_a = hidden_size//2 hidden_size_b = (hidden_size-1)//2 params_theta_0 = vs.get_variable(name+"theta_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer) cos_theta_0 = array_ops.reshape(math_ops.cos(params_theta_0), [capacity_a, -1, 1]) sin_theta_0 = array_ops.reshape(math_ops.sin(params_theta_0), [capacity_a, -1, 1]) params_theta_1 = vs.get_variable(name+"theta_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer) cos_theta_1 = array_ops.reshape(math_ops.cos(params_theta_1), [capacity_b, -1, 1]) sin_theta_1 = array_ops.reshape(math_ops.sin(params_theta_1), [capacity_b, -1, 1]) if comp: params_phi_0 = vs.get_variable(name+"phi_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer) cos_phi_0 = array_ops.reshape(math_ops.cos(params_phi_0), [capacity_a, -1, 1]) sin_phi_0 = array_ops.reshape(math_ops.sin(params_phi_0), [capacity_a, -1, 1]) cos_list_0_re = array_ops.reshape(array_ops.concat([cos_theta_0, math_ops.multiply(cos_theta_0, cos_phi_0)], 2), [capacity_a, -1]) cos_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_0), math_ops.multiply(cos_theta_0, sin_phi_0)], 2), [capacity_a, -1]) if hidden_size_a*2 != hidden_size: cos_list_0_re = array_ops.concat([cos_list_0_re, tf.ones([capacity_a, 1])], 1) cos_list_0_im = array_ops.concat([cos_list_0_im, tf.zeros([capacity_a, 1])], 1) cos_list_0 = math_ops.complex(cos_list_0_re, cos_list_0_im) sin_list_0_re = array_ops.reshape(array_ops.concat([sin_theta_0, - math_ops.multiply(sin_theta_0, cos_phi_0)], 2), [capacity_a, -1]) sin_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_0), - math_ops.multiply(sin_theta_0, sin_phi_0)], 2), [capacity_a, -1]) if hidden_size_a*2 != hidden_size: sin_list_0_re = array_ops.concat([sin_list_0_re, tf.zeros([capacity_a, 1])], 1) sin_list_0_im = array_ops.concat([sin_list_0_im, tf.zeros([capacity_a, 1])], 1) sin_list_0 = math_ops.complex(sin_list_0_re, sin_list_0_im) params_phi_1 = vs.get_variable(name+"phi_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer) cos_phi_1 = array_ops.reshape(math_ops.cos(params_phi_1), [capacity_b, -1, 1]) sin_phi_1 = array_ops.reshape(math_ops.sin(params_phi_1), [capacity_b, -1, 1]) cos_list_1_re = array_ops.reshape(array_ops.concat([cos_theta_1, math_ops.multiply(cos_theta_1, cos_phi_1)], 2), [capacity_b, -1]) cos_list_1_re = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1_re], 1) cos_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_1), math_ops.multiply(cos_theta_1, sin_phi_1)], 2), [capacity_b, -1]) cos_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), cos_list_1_im], 1) if hidden_size_b*2 != hidden_size-1: cos_list_1_re = array_ops.concat([cos_list_1_re, tf.ones([capacity_b, 1])], 1) cos_list_1_im = array_ops.concat([cos_list_1_im, tf.zeros([capacity_b, 1])], 1) cos_list_1 = math_ops.complex(cos_list_1_re, cos_list_1_im) sin_list_1_re = array_ops.reshape(array_ops.concat([sin_theta_1, -math_ops.multiply(sin_theta_1, cos_phi_1)], 2), [capacity_b, -1]) sin_list_1_re = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_re], 1) sin_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_1), -math_ops.multiply(sin_theta_1, sin_phi_1)], 2), [capacity_b, -1]) sin_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_im], 1) if hidden_size_b*2 != hidden_size-1: sin_list_1_re = array_ops.concat([sin_list_1_re, tf.zeros([capacity_b, 1])], 1) sin_list_1_im = array_ops.concat([sin_list_1_im, tf.zeros([capacity_b, 1])], 1) sin_list_1 = math_ops.complex(sin_list_1_re, sin_list_1_im) else: cos_list_0 = array_ops.reshape(array_ops.concat([cos_theta_0, cos_theta_0], 2), [capacity_a, -1]) sin_list_0 = array_ops.reshape(array_ops.concat([sin_theta_0, -sin_theta_0], 2), [capacity_a, -1]) if hidden_size_a*2 != hidden_size: cos_list_0 = array_ops.concat([cos_list_0, tf.ones([capacity_a, 1])], 1) sin_list_0 = array_ops.concat([sin_list_0, tf.zeros([capacity_a, 1])], 1) cos_list_1 = array_ops.reshape(array_ops.concat([cos_theta_1, cos_theta_1], 2), [capacity_b, -1]) cos_list_1 = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1], 1) sin_list_1 = array_ops.reshape(array_ops.concat([sin_theta_1, -sin_theta_1], 2), [capacity_b, -1]) sin_list_1 = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1], 1) if hidden_size_b*2 != hidden_size-1: cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([capacity_b, 1])], 1) sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([capacity_b, 1])], 1) if capacity_b != capacity_a: if comp: cos_list_1 = array_ops.concat([cos_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0) sin_list_1 = array_ops.concat([sin_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0) else: cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([1, hidden_size])], 0) sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([1, hidden_size])], 0) diag_vec = tf.reshape(tf.concat([cos_list_0, cos_list_1], 1), [capacity_a*2, hidden_size]) off_vec = tf.reshape(tf.concat([sin_list_0, sin_list_1], 1), [capacity_a*2, hidden_size]) if capacity_b != capacity_a: diag_vec = tf.slice(diag_vec, [0, 0], [capacity, hidden_size]) off_vec = tf.slice(off_vec, [0, 0], [capacity, hidden_size]) def _toTensorArray(elems): elems = ops.convert_to_tensor(elems) n = array_ops.shape(elems)[0] elems_ta = tensor_array_ops.TensorArray(dtype=elems.dtype, size=n, dynamic_size=False, infer_shape=True, clear_after_read=False) elems_ta = elems_ta.unstack(elems) return elems_ta diag_vec = _toTensorArray(diag_vec) off_vec = _toTensorArray(off_vec) if comp: omega = vs.get_variable(name+"omega", [hidden_size], initializer=theta_phi_initializer) diag = math_ops.complex(math_ops.cos(omega), math_ops.sin(omega)) else: diag = None return diag_vec, off_vec, diag, capacity
def _ExtractVolumePatchesGrad(op, grad): batch_size, planes_in, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].shape.dims ] input_bphwc = array_ops.shape(op.inputs[0]) batch_size = input_bphwc[0] channels = input_bphwc[4] # Create indices matrix for input tensor. # Note that 0 is preserved for padding location, # so indices for input start from 1 to 1 + rows_in * cols_in. input_indices_num = 1 + planes_in * rows_in * cols_in input_idx = array_ops.reshape( math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64), (1, planes_in, rows_in, cols_in, 1)) input_idx_patched = gen_array_ops.extract_volume_patches( input_idx, op.get_attr("ksizes"), op.get_attr("strides"), op.get_attr("padding")) # Create indices matrix for output tensor. _, planes_out, rows_out, cols_out, _ = [ dim.value for dim in op.outputs[0].shape.dims ] _, ksize_p, ksize_r, ksize_c, _ = op.get_attr("ksizes") # Indices for output start from 0. prc_indices_num = planes_out * rows_out * cols_out output_indices_num = prc_indices_num * ksize_p * ksize_r * ksize_c output_idx = array_ops.reshape( math_ops.range(output_indices_num, dtype=ops.dtypes.int64), (1, planes_out, rows_out, cols_out, ksize_p * ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). idx_matrix = array_ops.concat([ array_ops.expand_dims(input_idx_patched, axis=-1), array_ops.expand_dims(output_idx, axis=-1) ], axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) sp_mat_full = sparse_tensor.SparseTensor( idx_map, array_ops.ones([output_indices_num], dtype=grad.dtype), sp_shape) # Remove all padding locations [0, :]. sp_mat = sparse_ops.sparse_slice( sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num)) grad_expanded = array_ops.transpose( array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad), (batch_size, planes_out, rows_out, cols_out, ksize_p, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 5, 6, 0, 7)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape( jac, (planes_in, rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (3, 0, 1, 2, 4)) return [grad_out]
def _ReshapeToInput(op, grad): """Reshapes the gradient to the shape of the original input.""" return array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad), array_ops.shape(op.inputs[0]))
def loss(x): y = array_ops.reshape( gen_math_ops.mat_mul(x, kernel), []) - array_ops.identity(1.) return y * y
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.cast(params_shape, dtypes.int32) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) batch_dims = int(op.get_attr("batch_dims")) if batch_dims < 0: batch_dims += indices.shape.ndims # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: if context.executing_eagerly(): with ops.device("/cpu:0"): params_tail_shape = array_ops.identity(params_shape)[1:] else: params_tail_shape = params_shape[1:] values_shape = array_ops.concat([indices_size, params_tail_shape], 0) values = array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad), values_shape) indices = array_ops.reshape(indices, indices_size) params_grad = ops.IndexedSlices(values, indices, params_shape) else: # Handle axis by transposing the axis dimension to be the first non-batch # dimension, compute the gradient and transpose the result back. outer_shape = params_shape[:axis] inner_shape = params_shape[axis:][1:] values_shape = array_ops.concat([outer_shape, [-1], inner_shape], 0) values_dims = array_ops.size(values_shape) axis_dims = array_ops.size(outer_shape) outer_batches_indices = math_ops.range(batch_dims) batch_axis_indices = math_ops.range(batch_dims, axis_dims) inner_axes_indices = math_ops.range(axis_dims + 1, values_dims) values = array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad), values_shape) # Move values[axis] up to values[batch_dims] transpose_dims = array_ops.concat([ outer_batches_indices, [axis_dims], batch_axis_indices, inner_axes_indices ], 0) values_transpose = array_ops.transpose(values, transpose_dims) params_grad = _BatchGatherGrad(params_shape, values_transpose, indices, batch_dims, params_shape[axis]) # Inverts the above transpose by moving dimension batch_dims back to its # original position. invert_transpose_dims = array_ops.concat([ outer_batches_indices, batch_axis_indices + 1, [batch_dims], inner_axes_indices ], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def _vec(x): """Stacks column of matrix to form a single column.""" return array_ops.reshape( array_ops.matrix_transpose(x), array_ops.concat([array_ops.shape(x)[:-2], [-1]], axis=0))
def histogram_fixed_width_bins(values, value_range, nbins=100, dtype=dtypes.int32, name=None): """Bins the given values for use in a histogram. Given the tensor `values`, this operation returns a rank 1 `Tensor` representing the indices of a histogram into which each element of `values` would be binned. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values`. values <= value_range[0] will be mapped to hist[0], values >= value_range[1] will be mapped to hist[-1]. nbins: Scalar `int32 Tensor`. Number of histogram bins. dtype: dtype for returned histogram. name: A name for this operation (defaults to 'histogram_fixed_width'). Returns: A `Tensor` holding the indices of the binned values whose shape matches `values`. Examples: ```python # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) nbins = 5 value_range = [0.0, 5.0] new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] with tf.get_default_session() as sess: indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5) variables.global_variables_initializer().run() sess.run(indices) => [0, 0, 1, 2, 4] ``` """ with ops.name_scope(name, 'histogram_fixed_width_bins', [values, value_range, nbins]): values = ops.convert_to_tensor(values, name='values') shape = array_ops.shape(values) values = array_ops.reshape(values, [-1]) value_range = ops.convert_to_tensor(value_range, name='value_range') nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins') nbins_float = math_ops.cast(nbins, values.dtype) # Map tensor values that fall within value_range to [0, 1]. scaled_values = math_ops.truediv(values - value_range[0], value_range[1] - value_range[0], name='scaled_values') # map tensor values within the open interval value_range to {0,.., nbins-1}, # values outside the open interval will be zero or less, or nbins or more. indices = math_ops.floor(nbins_float * scaled_values, name='indices') # Clip edge cases (e.g. value = value_range[1]) or "outliers." indices = math_ops.cast( clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32) return array_ops.reshape(indices, shape)