def undo((full_val, sub_val)): """Undo the slices.""" if tf.shape(full_val).shape.as_list()[0] != 2: raise NotImplementedError( "TODO(lmetz) other than this is not implemented.") n_words, n_word_feat = var_shape _, n_feat = sub_val.shape.as_list() n_active = tf.size(grad_idx) shape = [n_active, n_word_feat * n_feat] in_shape_form = tf.reshape(sub_val, shape) new_shape = [n_words, n_word_feat * n_feat] mask_shape = [n_words, n_word_feat * n_feat] scattered = tf.scatter_nd(tf.reshape(tf.to_int32(grad_idx), [-1, 1]), in_shape_form, shape=new_shape) mask = tf.scatter_nd(tf.reshape(tf.to_int32(grad_idx), [-1, 1]), tf.ones_like(in_shape_form), shape=mask_shape) # put back into the flat format scattered = tf.reshape(scattered, [n_words * n_word_feat, n_feat]) mask = tf.reshape(mask, [n_words * n_word_feat, n_feat]) # this is the update part / fake scatter_update but with gradients. return full_val * (1 - mask) + scattered * mask
def scatter_update(self, sequence, updates, positions): """Scatter-update a sequence. Args: sequence: A [batch_size, seq_len] or [batch_size, seq_len, depth] tensor updates: A tensor of size batch_size*seq_len(*depth) positions: A [batch_size, n_positions] tensor Returns: A tuple of two tensors. First is a [batch_size, seq_len] or [batch_size, seq_len, depth] tensor of "sequence" with elements at "positions" replaced by the values at "updates." Updates to index 0 are ignored. If there are duplicated positions the update is only applied once. Second is a [batch_size, seq_len] mask tensor of which inputs were updated. """ shape = self.get_shape_list(sequence, expected_rank=[2, 3]) depth_dimension = (len(shape) == 3) if depth_dimension: B, L, D = shape else: B, L = shape D = 1 sequence = tf.expand_dims(sequence, -1) N = self.get_shape_list(positions)[1] shift = tf.expand_dims(L * tf.range(B), -1) flat_positions = tf.reshape(positions + shift, [-1, 1]) flat_updates = tf.reshape(updates, [-1, D]) updates = tf.scatter_nd(flat_positions, flat_updates, [B * L, D]) updates = tf.reshape(updates, [B, L, D]) flat_updates_mask = tf.ones([B * N], tf.int32) updates_mask = tf.scatter_nd(flat_positions, flat_updates_mask, [B * L]) updates_mask = tf.reshape(updates_mask, [B, L]) not_first_token = tf.concat( [tf.zeros((B, 1), tf.int32), tf.ones((B, L - 1), tf.int32)], -1) updates_mask *= not_first_token updates_mask_3d = tf.expand_dims(updates_mask, -1) # account for duplicate positions if sequence.dtype == tf.float32: updates_mask_3d = tf.cast(updates_mask_3d, tf.float32) updates /= tf.maximum(1.0, updates_mask_3d) else: assert sequence.dtype == tf.int32 updates = tf.math.floordiv(updates, tf.maximum(1, updates_mask_3d)) updates_mask = tf.minimum(updates_mask, 1) updates_mask_3d = tf.minimum(updates_mask_3d, 1) updated_sequence = (((1 - updates_mask_3d) * sequence) + (updates_mask_3d * updates)) if not depth_dimension: updated_sequence = tf.squeeze(updated_sequence, -1) return updated_sequence, updates_mask
def cross_replica_concat(tensor): """A cross-replica concatenation of a single Tensor across TPU cores. Input tensor is assumed to have batch dimension as the first dimension. The concatenation is done along the batch dimension. Args: tensor: Input Tensor which should be concatenated across TPU cores. Returns: The concatenated Tensor with batch dimension multiplied by the number of TPU cores. """ num_tpu_replicas = tpu_function.get_tpu_context().number_of_shards if num_tpu_replicas is not None: # Scattered tensor has shape [num_replicas, local_batch_size, ...] scattered_tensor = tf.scatter_nd( indices=[[local_tpu_replica_id()]], updates=[tensor], shape=[num_tpu_replicas] + tensor.shape.as_list()) reduced_tensor = tf.tpu.cross_replica_sum(scattered_tensor) # Returned tensor has shape [num_replicas * local_batch_size, ...] return tf.reshape(reduced_tensor, [-1] + scattered_tensor.shape.as_list()[2:]) else: # This is a no op if not running on TPU return tensor
def batch_segment_sum_embeddings(long_embeddings: tf.Tensor, long_word_idx: tf.Tensor, long_input_mask: tf.Tensor) -> tf.Tensor: """Sums wordpiece `long_embeddings` into word embeddings. Args: long_embeddings: <float32>[batch_size, long_max_length, hidden_size] Tensor of contextual embeddings for wordpieces, as output by ETC model. long_word_idx: <int32>[batch_size, long_max_length] Tensor representing the index of the word each wordpiece belongs to. The index for padding tokens can be any integer in the range [0, long_max_length) and will be ignored. long_input_mask: <int32>[batch_size, long_max_length] Tensor representing which *wordpiece* tokens in `long_embeddings` are present, with `1` for present tokens and `0` for padding. Returns: <float32>[batch_size, long_max_length, hidden_size] Tensor of embeddings for each word calculated by summing the embeddings of the wordpieces belonging to the word. The number of words is no greater than the number of wordpieces, but we keep `long_max_length`, so there may be an increase in padding. All padding embeddings will be 0. """ # Zero out padding embeddings. long_embeddings *= tf.cast( long_input_mask, dtype=long_embeddings.dtype)[:, :, tf.newaxis] batch_size = tf.shape(long_embeddings)[0] example_idx = tf.broadcast_to( tf.range(batch_size)[:, tf.newaxis], shape=tf.shape(long_word_idx)) scatter_indices = tf.stack([example_idx, long_word_idx], axis=-1) return tf.scatter_nd( indices=scatter_indices, updates=long_embeddings, shape=tf.shape(long_embeddings))
def batch_skew(vec, batch_size=None): """ vec is N x 3, batch_size is int returns N x 3 x 3. Skew_sym version of each matrix. """ print("vec=", vec) with tf.name_scope("batch_skew", values=[vec]): if batch_size is None: batch_size = vec.shape.as_list()[0] col_inds = tf.constant([1, 2, 3, 5, 6, 7]) indices = tf.reshape( tf.reshape(tf.range(0, batch_size) * 9, [-1, 1]) + col_inds, [-1, 1]) updates = tf.reshape( tf.stack([ -vec[:, 2], vec[:, 1], vec[:, 2], -vec[:, 0], -vec[:, 1], vec[:, 0] ], axis=1), [-1]) out_shape = [batch_size * 9] res = tf.scatter_nd(indices, updates, out_shape) res = tf.reshape(res, [batch_size, 3, 3]) return res
def compute_p2LFA(self, P_stencil, n, grid_size): batch_size = P_stencil.get_shape().as_list()[0] K = self.map_2_to_1(grid_size=grid_size) pi = np.pi theta_x = np.array(([i * 2 * pi / n for i in range(-n // (grid_size * 2) + 1, n // (grid_size * 2) + 1)])) theta_y = np.array([i * 2 * pi / n for i in range(-n // (grid_size * 2) + 1, n // (grid_size * 2) + 1)]) num_modes = theta_x.shape[0] X, Y = np.meshgrid(np.arange(-1, 2), np.arange(-1, 2)) with tf.device(self.device): P = tf.zeros((len(theta_y), len(theta_x), batch_size, grid_size ** 2, (grid_size // 2) ** 2), dtype=tf.complex128) modes = np.array([[np.exp(-1j * (tx * X + ty * Y)) for tx in theta_x] for ty in theta_y]) fourier_component = tf.to_complex128(np.tile(modes, (batch_size, 1, 1, 1, 1))) for ic in range(grid_size // 2): i = 2 * ic # ic is the index on the coarse grid, and i is the index on the fine grid for jc in range(grid_size // 2): j = 2 * jc # jc is the index on the coarse grid, and j is the index on the fine grid J = int(grid_size // 2 * jc + ic) for k in range(3): for m in range(3): I = int(K[i, j, k, m]) a = fourier_component[:, :, :, k, m] * tf.reshape(P_stencil[:, ic, jc, k, m], (-1, 1, 1)) a = tf.transpose(a, (1, 2, 0)) P = P + tf.to_complex128( tf.scatter_nd(indices=tf.constant(self.idx_array((I, J, int(batch_size), num_modes))), updates=tf.ones(batch_size * (num_modes ** 2)), shape=tf.constant([num_modes, num_modes, batch_size, grid_size ** 2, (grid_size // 2) ** 2]))) \ * tf.reshape(a, (theta_x.shape[0], theta_y.shape[0], batch_size, 1, 1)) return P
def cross_replica_concat(tensor, num_replicas, name=None): """Reduce a concatenation of the `tensor` across tpu cores. Branched from //audio/ears/nnfp/tensorflow/tpu_ops.py Args: tensor: tensor to concatenate. num_replicas: Number of TPU cores. name: A name for the op. Returns: Tensor of the same rank as `tensor` with first dimension `num_replicas` times larger. """ replica_id = xla.replica_id() with tf.compat.v1.name_scope(name, 'tpu_cross_replica_concat'): # This creates a tensor that is like the input tensor but has an added # replica dimension as the outermost dimension. On each replica it will # contain the local values and zeros for all other values that need to be # fetched from other replicas. ext_tensor = tf.scatter_nd(indices=[[replica_id]], updates=[tensor], shape=[num_replicas] + tensor.shape.as_list()) # As every value is only present on one replica and 0 in all others, adding # them all together will result in the full tensor on all replicas. ext_tensor = tf.compat.v1.tpu.cross_replica_sum(ext_tensor) # Flatten the replica dimension. # The first dimension size will be: tensor.shape[0] * num_replicas # Using [-1] trick to support also scalar input. return tf.reshape(ext_tensor, [-1] + ext_tensor.shape.as_list()[2:])
def f2(): # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. # To do this, we reshape `neg_class_loss_all` to 1D... neg_class_loss_all_1D = tf.reshape( neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,) # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... values, indices = tf.nn.top_k(neg_class_loss_all_1D, n_negative_keep, False) # We don't need sorting # ...and with these indices we'll create a mask... negatives_keep = tf.scatter_nd( tf.expand_dims(indices, axis=1), updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(neg_class_loss_all_1D )) # Tensor of shape (batch_size * n_boxes,) negatives_keep = tf.to_float( tf.reshape(negatives_keep, [batch_size, n_boxes ])) # Tensor of shape (batch_size, n_boxes) # ...and use it to keep only those boxes and mask all other classification losses neg_class_loss = tf.reduce_sum( classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,) return neg_class_loss
def cutout(x, toss, ratio=[1, 2]): batch_size = tf.shape(x)[0] image_size = tf.shape(x)[1:3] cutout_size = image_size * ratio[0] // ratio[1] offset_x = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[0] + (1 - cutout_size[0] % 2), dtype=tf.int32) offset_y = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[1] + (1 - cutout_size[1] % 2), dtype=tf.int32) grid_batch, grid_x, grid_y = tf.meshgrid(tf.range(batch_size, dtype=tf.int32), tf.range(cutout_size[0], dtype=tf.int32), tf.range(cutout_size[1], dtype=tf.int32), indexing='ij') cutout_grid = tf.stack([ grid_batch, grid_x + offset_x - cutout_size[0] // 2, grid_y + offset_y - cutout_size[1] // 2 ], axis=-1) mask_shape = tf.stack([batch_size, image_size[0], image_size[1]]) cutout_grid = tf.maximum(cutout_grid, 0) cutout_grid = tf.minimum(cutout_grid, tf.reshape(mask_shape - 1, [1, 1, 1, 3])) mask = tf.maximum( 1 - tf.reshape(toss, [-1, 1, 1]) * tf.scatter_nd( cutout_grid, tf.ones([batch_size, cutout_size[0], cutout_size[1]], dtype=tf.float32), mask_shape), 0) x = x * tf.expand_dims(mask, axis=3) return x
def tpu_cross_replica_concat(tensor, tpu_context=None): """Reduce a concatenation of the `tensor` across TPU cores. Args: tensor: tensor to concatenate. tpu_context: A `TPUContext`. If not set, CPU execution is assumed. Returns: Tensor of the same rank as `tensor` with first dimension `num_replicas` times larger. """ if tpu_context is None or tpu_context.num_replicas <= 1: return tensor num_replicas = tpu_context.num_replicas with tf.name_scope("tpu_cross_replica_concat"): # This creates a tensor that is like the input tensor but has an added # replica dimension as the outermost dimension. On each replica it will # contain the local values and zeros for all other values that need to be # fetched from other replicas. ext_tensor = tf.scatter_nd( indices=[[xla.replica_id()]], updates=[tensor], shape=[num_replicas] + tensor.shape.as_list(), ) # As every value is only present on one replica and 0 in all others, adding # them all together will result in the full tensor on all replicas. ext_tensor = tf.tpu.cross_replica_sum(ext_tensor) # Flatten the replica dimension. # The first dimension size will be: tensor.shape[0] * num_replicas # Using [-1] trick to support also scalar input. return tf.reshape(ext_tensor, [-1] + ext_tensor.shape.as_list()[2:])
def compute_dense_matrix(self, stencils, batch_size, grid_size): with tf.device(self.device): indexes, values_indices = self.get_indices_compute_A((batch_size, grid_size)) tau = tf.scatter_nd(indices=indexes, updates=tf.gather_nd(params=stencils, indices=values_indices), shape=(batch_size, grid_size ** 2, grid_size ** 2)) return tau
def _cic_paint(mesh, neighboor_coords, kernel, shift, name=None): """ Paints particules on a 3D mesh. Parameters: ----------- mesh: tensor (batch_size, nc, nc, nc) Input 3D mesh tensor shift: [x,y,z] array of coordinate shifting """ with tf.name_scope(name, "cic_update", [mesh, neighboor_coords, kernel]): shape = tf.shape(mesh) batch_size = shape[0] nx, ny, nz = shape[-3], shape[-2], shape[-1] # TODO: Assert shift shape neighboor_coords = tf.reshape(neighboor_coords, (-1, 8, 4)) neighboor_coords = neighboor_coords + tf.reshape( tf.constant(shift), [1, 1, 4]) update = tf.scatter_nd(neighboor_coords, tf.reshape(kernel, (-1, 8)), [batch_size, nx, ny, nz]) mesh = mesh + tf.reshape(update, mesh.shape) return mesh
def _unsparsify(x): if not isinstance(x, tf.IndexedSlices): return x assert x.dense_shape is not None, "memory_saving_gradients encountered sparse gradients of unknown shape" indices = x.indices while indices.shape.ndims < x.values.shape.ndims: indices = tf.expand_dims(indices, -1) return tf.scatter_nd(indices, x.values, x.dense_shape)
def infer_step(i, recent_output, recent_logits, cache, decoding_stats): """Inference step.""" features_copy = features.copy() features_copy["targets"] = recent_output cur_sample, cur_logit = self.sample(features_copy, decode_step=i, cache=cache, decoding_stats=decoding_stats) pos = i samples = recent_output + tf.scatter_nd( indices=[[b, pos, 0, 0] for b in range(self.batch_size)], updates=cur_sample, shape=utils.shape_list(recent_output)) logits = recent_logits + tf.scatter_nd( indices=[[b, pos] for b in range(self.batch_size)], updates=cur_logit, shape=utils.shape_list(recent_logits)) return i + 1, samples, logits, cache, decoding_stats
def _calc_final_dist(self, vocab_dists, attn_dists): """Calculate the final distribution, for the pointer-generator model Args: vocab_dists: The vocabulary distributions. List length max_dec_steps of (batch_size, vsize) arrays. The words are in the order they appear in the vocabulary file. attn_dists: The attention distributions. List length max_dec_steps of (batch_size, attn_len) arrays Returns: final_dists: The final distributions. List length max_dec_steps of (batch_size, extended_vsize) arrays. """ with tf.variable_scope('final_distribution'): # Multiply vocab dists by p_gen and attention dists by (1-p_gen) vocab_dists = [ p_gen * dist for (p_gen, dist) in zip(self.p_gens, vocab_dists) ] attn_dists = [(1 - p_gen) * dist for (p_gen, dist) in zip(self.p_gens, attn_dists)] # Concatenate some zeros to each vocabulary dist, to hold the probabilities for in-article OOV words extended_vsize = self._vocab.size( ) + self._max_art_oovs # the maximum (over the batch) size of the extended vocabulary extra_zeros = tf.zeros((self._hps.batch_size, self._max_art_oovs)) vocab_dists_extended = [ tf.concat(axis=1, values=[dist, extra_zeros]) for dist in vocab_dists ] # list length max_dec_steps of shape (batch_size, extended_vsize) # Project the values in the attention distributions onto the appropriate entries in the final distributions # This means that if a_i = 0.1 and the ith encoder word is w, and w has index 500 in the vocabulary, then we add 0.1 onto the 500th entry of the final distribution # This is done for each decoder timestep. # This is fiddly; we use tf.scatter_nd to do the projection batch_nums = tf.range( 0, limit=self._hps.batch_size) # shape (batch_size) batch_nums = tf.expand_dims(batch_nums, 1) # shape (batch_size, 1) attn_len = tf.shape(self._enc_batch_extend_vocab)[ 1] # number of states we attend over batch_nums = tf.tile(batch_nums, [1, attn_len]) # shape (batch_size, attn_len) indices = tf.stack((batch_nums, self._enc_batch_extend_vocab), axis=2) # shape (batch_size, enc_t, 2) shape = [self._hps.batch_size, extended_vsize] attn_dists_projected = [ tf.scatter_nd(indices, copy_dist, shape) for copy_dist in attn_dists ] # list length max_dec_steps (batch_size, extended_vsize) # Add the vocab distributions and the copy distributions together to get the final distributions # final_dists is a list length max_dec_steps; each entry is a tensor shape (batch_size, extended_vsize) giving the final distribution for that decoder timestep # Note that for decoder timesteps and examples corresponding to a [PAD] token, this is junk - ignore. final_dists = [ vocab_dist + copy_dist for (vocab_dist, copy_dist ) in zip(vocab_dists_extended, attn_dists_projected) ] return final_dists
def _unsparsify(x): if not isinstance(x, tf.IndexedSlices): return x if x.dense_shape is None: raise ValueError( "memory_saving_gradients has sparse gradients of unknown shape.") indices = x.indices while indices.shape.ndims < x.values.shape.ndims: indices = tf.expand_dims(indices, -1) return tf.scatter_nd(indices, x.values, x.dense_shape)
def maybe_sample(): """Perform scheduled sampling.""" def maybe_concatenate_auxiliary_inputs(outputs_, indices=None): """Concatenate outputs with auxiliary inputs, if they exist.""" if self._auxiliary_input_tas is None: return outputs_ next_time = time + 1 auxiliary_inputs = tf.nest.map_structure( lambda ta: ta.read(next_time), self._auxiliary_input_tas) if indices is not None: auxiliary_inputs = tf.gather_nd( auxiliary_inputs, indices) return tf.nest.map_structure( lambda x, y: tf.concat((x, y), -1), outputs_, auxiliary_inputs) if self._next_inputs_fn is None: return tf.where( sample_ids, maybe_concatenate_auxiliary_inputs(outputs), base_next_inputs) where_sampling = tf.cast(tf.where(sample_ids), tf.int32) where_not_sampling = tf.cast( tf.where(tf.logical_not(sample_ids)), tf.int32) outputs_sampling = tf.gather_nd(outputs, where_sampling) inputs_not_sampling = tf.gather_nd(base_next_inputs, where_not_sampling) sampled_next_inputs = maybe_concatenate_auxiliary_inputs( self._next_inputs_fn(outputs_sampling), where_sampling) base_shape = tf.shape(base_next_inputs) return (tf.scatter_nd(indices=where_sampling, updates=sampled_next_inputs, shape=base_shape) + tf.scatter_nd(indices=where_not_sampling, updates=inputs_not_sampling, shape=base_shape))
def penalize_used(logits, output): # I want to change the indices of logits wherever the index is found in output change_tensor = tf.zeros_like(logits, dtype=logits.dtype) unique = tf.unique(output[0])[0] ones = tf.ones_like(unique, dtype=unique.dtype) indices = tf.expand_dims(unique, 1) updates = tf.scatter_nd(indices, ones, [logits.shape[1]]) bool_tensor = tf.expand_dims(tf.cast(updates, tf.bool), 0) return tf.compat.v1.where(bool_tensor, logits * 0.85, logits)
def build_graph(parameters): """Build the scatter_nd op testing graph.""" indices = tf.compat.v1.placeholder(dtype=parameters["indices_dtype"], name="indices", shape=parameters["indices_shape"]) updates = tf.compat.v1.placeholder(dtype=parameters["updates_dtype"], name="updates", shape=parameters["updates_shape"]) shape = tf.compat.v1.placeholder(dtype=parameters["shape_dtype"], name="shape", shape=parameters["shape_shape"]) out = tf.scatter_nd(indices, updates, shape) return [indices, updates, shape], [out]
def call(self, x, padding=None): """Return outputs of the feedforward network. Args: x: tensor with shape [batch_size, length, hidden_size] padding: (optional) If set, the padding values are temporarily removed from x (provided self.allow_pad is set). The padding values are placed back in the output tensor in the same locations. shape [batch_size, length] Returns: Output of the feedforward network. tensor with shape [batch_size, length, hidden_size] """ padding = None if not self.allow_pad else padding # Retrieve dynamically known shapes batch_size = tf.shape(x)[0] length = tf.shape(x)[1] if padding is not None: with tf.name_scope("remove_padding"): # Flatten padding to [batch_size*length] pad_mask = tf.reshape(padding, [-1]) nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = tf.reshape(x, [-1, self.hidden_size]) x = tf.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions. x.set_shape([None, self.hidden_size]) x = tf.expand_dims(x, axis=0) output = self.filter_dense_layer(x) if self.train: output = tf.nn.dropout(output, 1.0 - self.relu_dropout) output = self.output_dense_layer(output) if padding is not None: with tf.name_scope("re_add_padding"): output = tf.squeeze(output, axis=0) output = tf.scatter_nd( indices=nonpad_ids, updates=output, shape=[batch_size * length, self.hidden_size] ) output = tf.reshape(output, [batch_size, length, self.hidden_size]) return output
def set_final(sequence, sequence_length, values, time_major=False): """Sets the final values in a batch of sequences, and clears those after.""" sequence_batch_major = (sequence if not time_major else tf.transpose( sequence, [1, 0, 2])) final_index = _get_final_index(sequence_length, time_major=False) mask = tf.sequence_mask(tf.maximum(0, sequence_length - 1), maxlen=sequence_batch_major.shape[1], dtype=tf.float32) sequence_batch_major = ( tf.expand_dims(mask, axis=-1) * sequence_batch_major + tf.scatter_nd(final_index, values, tf.shape(sequence_batch_major))) if time_major: return tf.transpose(sequence_batch_major, [1, 0, 2]) return sequence_batch_major
def __call__(self, shape, dtype=None, partition_info=None): del partition_info # unused assert len(shape) > 2, shape support = tuple(shape[:-2]) + (1, 1) indices = [[s // 2 for s in support]] updates = tf.constant([self.gain], dtype=dtype) kernel = tf.scatter_nd(indices, updates, support) assert shape[-2] == shape[-1], shape if shape[-1] != 1: kernel *= tf.eye(shape[-1], dtype=dtype) return kernel
def _preprocess_candidate_answers(features, max_num_candidates, max_seq_length): """Prepares dense labels for each candidate.""" ragged_indices = tf.RaggedTensor.from_row_lengths( features["can_indexes"].values, features["can_sizes"]) candidate_id = tf.ragged.row_splits_to_segment_ids( ragged_indices.row_splits) indices = tf.stack([candidate_id, ragged_indices.flat_values], axis=-1) updates = tf.ones_like(candidate_id, dtype=tf.int32) features["can_label_ids"] = tf.scatter_nd( indices=indices, updates=updates, shape=[max_num_candidates, max_seq_length]) # Variable length tensors are not supported on TPU. del features["can_indexes"]
def maskedSoftmax(logits, mask): """ Masked softmax over dim 1 :param logits: (N, L) :param mask: (N, L) :return: probabilities (N, L) """ indices = tf.where(mask) values = tf.gather_nd(logits, indices) denseShape = tf.cast(tf.shape(logits), tf.int64) sparseResult = tf.sparse_softmax( tf.SparseTensor(indices, values, denseShape)) result = tf.scatter_nd(sparseResult.indices, sparseResult.values, sparseResult.dense_shape) result.set_shape(logits.shape) return result
def render_by_scatter(size, points, colors=None, gt_presence=None): """Renders point by using tf.scatter_nd.""" if colors is None: colors = tf.ones(points.shape[:-1].as_list() + [3], dtype=tf.float32) if gt_presence is not None: colors *= tf.cast(tf.expand_dims(gt_presence, -1), colors.dtype) batch_size, n_points = points.shape[:-1].as_list() shape = [batch_size] + list(size) + [3] batch_idx = tf.reshape(tf.range(batch_size), [batch_size, 1, 1]) batch_idx = snt.TileByDim([1], [n_points])(batch_idx) idx = tf.concat([batch_idx, tf.cast(points, tf.int32)], -1) return tf.scatter_nd(idx, colors, shape)
def max_scoring_span(start_scores, end_scores, max_length, no_answer_bias=0): """Compute max scoring span, using the sum of start and end scores. Args: start_scores: <float32> [batch_size, seq_len] end_scores: <float32> [batch_size, seq_len] max_length: <int32> Max answer length. no_answer_bias: <float32> Log-odds threshold for "no-answer" selection. I.e. if log p(span=i,j)/p(span=NULL) > no_answer_bias, then select i, j as the span, and NULL otherwise. Returns: start: <int32> [batch_size] end: <int32> [batch_size] """ # Create sparse tensor of size [seq_len]. seq_len = tensor_utils.shape(start_scores, -1) no_answer_bias = tf.scatter_nd([[0]], [no_answer_bias], [seq_len]) no_answer_bias = tf.cast(no_answer_bias, tf.float32) # Apply bias to CLS token logits. no_answer_bias = tf.div(no_answer_bias, 2) start_scores += tf.expand_dims(no_answer_bias, 0) end_scores += tf.expand_dims(no_answer_bias, 0) # Compute outer sum, and mask to be upper triangular. # This gives a matrix of start[i] + end[j] scores, where j >= i. scores = tf.expand_dims(start_scores, 2) + tf.expand_dims(end_scores, 1) mask = (1 - tf.matrix_band_part(tf.ones_like(scores), 0, max_length - 1)) scores -= mask * 1e-4 def map_fn(inputs): flattened = tf.reshape(inputs, [-1]) argmax = tf.argmax(flattened, output_type=tf.int32) indices = tensor_utils.unravel_index_2d(argmax, inputs.shape) score = flattened[argmax] return indices, score # Return i, j indices of max-scoring entry. with tf.device("/cpu"): endpoints, span_scores = tf.map_fn(fn=map_fn, elems=scores, dtype=(tf.int32, tf.float32)) start = endpoints[:, 0] end = endpoints[:, 1] return start, end, span_scores
def masked_apply(tensor, op, mask, set_outside_zero=True): """ Apply the function op to tensor only at locations indicated by mask. If set_outside_zero == True, set the locations outside the mask to zero, otherwise keep original value of tensor. :param tensor: The tensor on which op is applied. :param op: The operation. :param mask: The boolean mask. :param set_outside_zero: If True, set the locations outside the mask to zero, otherwise keep original values of tensor. :return: Tensor with applied function. """ chosen = tf.boolean_mask(tensor, mask) applied = op(chosen) idx = tf.to_int32(tf.where(mask)) result = tf.scatter_nd(idx, applied, tf.shape(tensor)) if not set_outside_zero: result = tf.where(mask, result, tensor) return result
def model_fun(features, labels, mode, params): atomic_contributions = {} atom_types = params['atom_types'] for (t, lays, offs, acts) in zip(atom_types, params['layers'], params['offsets'], params['act_funs']): with _tf.variable_scope('{}_ANN'.format(t), reuse=_tf.AUTO_REUSE): input_tensor = features['%s_input' % t] atomic_contributions[t] = BPAtomicNN(input_tensor, lays, offs, acts) predicted_energies = _tf.scatter_nd( _tf.concat([features['%s_indices' % t] for t in atom_types], 0), _tf.concat([ _tf.reshape(atomic_contributions[t].output, [-1]) for t in atom_types ], 0), _tf.shape(labels), name='E_prediction') if mode == _tf.estimator.ModeKeys.PREDICT: predictions = {'energies': predicted_energies} return _tf.estimator.EstimatorSpec(mode, predictions=predictions) # num_atoms = _tf.reduce_sum([_tf.bincount(features['%s_indices' % t]) # for t in atom_types], axis=0, # name='NumberOfAtoms') # Compute loss. loss = _tf.losses.mean_squared_error(labels=labels, predictions=predicted_energies) rmse = _tf.metrics.root_mean_squared_error(labels, predicted_energies) metrics = {'rmse': rmse} _tf.summary.scalar('rmse', rmse[1]) if mode == _tf.estimator.ModeKeys.EVAL: return _tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) assert mode == _tf.estimator.ModeKeys.TRAIN optimizer = _tf.train.AdagradOptimizer(learning_rate=0.1) train_op = optimizer.minimize(loss, global_step=_tf.train.get_global_step()) return _tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def _initialize_vars(self): """Sets up the training graph.""" with tf.variable_scope(self.name) as scope: self.global_step = tf.get_variable( 'global_step', shape=[], initializer=tf.zeros_initializer()) self.input = tf.placeholder(tf.float32, shape=[None, self.input_dim]) current = self.input for i in range(self.encode_layers - 1): current = self._relu_layer(current, self.input_dim, self.input_dim, i) self.encoded = self._relu_layer(current, self.input_dim, self.hidden_units, self.encode_layers - 1) # Make batch size the last dimension (for use with tf.nn.top_k) encoded_t = tf.transpose(self.encoded) # Compute the indices corresponding to the top k activations for each # neuron in the final encoder layer k = int(self.sparsity * self.batch_size) _, top_indices = tf.nn.top_k(encoded_t, k=k, sorted=False) # Transform top_indices, which contains rows of column indices, into # indices, a list of [row, column] pairs (for use with tf.scatter_nd) top_k_unstacked = tf.unstack(top_indices, axis=1) row_indices = [tf.range(self.hidden_units) for _ in range(k)] combined_columns = tf.transpose( tf.stack(_interleave(row_indices, top_k_unstacked))) indices = tf.reshape(combined_columns, [-1, 2]) # Apply sparsity constraint updates = tf.ones(self.hidden_units * k) shape = tf.constant([self.hidden_units, self.batch_size]) mask = tf.scatter_nd(indices, updates, shape) sparse_encoded = self.encoded * tf.transpose(mask) self.decoded = self._decode_layer(sparse_encoded) self.loss = tf.reduce_sum(tf.square(self.decoded - self.input)) self.optimizer_op = self.optimizer(self.learning_rate).minimize( self.loss, self.global_step) self.saver = tf.train.Saver(tf.global_variables())
def nucleus_sampling(self, logits): """Nucleus sampling.""" p = self.hparams.nucleus_sampling tf.logging.info("Nucleus sampling top_p = {}".format(p)) sort_indices = tf.argsort(logits, axis=-1, direction="DESCENDING") probs = tf.gather(tf.nn.softmax(logits), sort_indices, batch_dims=1) cumprobs = tf.cumsum(probs, axis=-1, exclusive=True) # The top 1 candidate always will not be masked. # This way ensures at least 1 indices will be selected. sort_mask = tf.cast(tf.greater(cumprobs, p), logits.dtype) batch_indices = tf.tile( tf.expand_dims(tf.range(logits.shape[0]), axis=-1), [1, logits.shape[1]]) top_p_mask = tf.scatter_nd( tf.stack([batch_indices, sort_indices], axis=-1), sort_mask, logits.shape) logits -= top_p_mask * logits.dtype.max return logits