Beispiel #1
0
    def undo((full_val, sub_val)):
        """Undo the slices."""
        if tf.shape(full_val).shape.as_list()[0] != 2:
            raise NotImplementedError(
                "TODO(lmetz) other than this is not implemented.")
        n_words, n_word_feat = var_shape
        _, n_feat = sub_val.shape.as_list()
        n_active = tf.size(grad_idx)

        shape = [n_active, n_word_feat * n_feat]
        in_shape_form = tf.reshape(sub_val, shape)

        new_shape = [n_words, n_word_feat * n_feat]
        mask_shape = [n_words, n_word_feat * n_feat]

        scattered = tf.scatter_nd(tf.reshape(tf.to_int32(grad_idx), [-1, 1]),
                                  in_shape_form,
                                  shape=new_shape)
        mask = tf.scatter_nd(tf.reshape(tf.to_int32(grad_idx), [-1, 1]),
                             tf.ones_like(in_shape_form),
                             shape=mask_shape)

        # put back into the flat format
        scattered = tf.reshape(scattered, [n_words * n_word_feat, n_feat])
        mask = tf.reshape(mask, [n_words * n_word_feat, n_feat])

        # this is the update part / fake scatter_update but with gradients.
        return full_val * (1 - mask) + scattered * mask
Beispiel #2
0
    def scatter_update(self, sequence, updates, positions):
        """Scatter-update a sequence.

    Args:
      sequence: A [batch_size, seq_len] or [batch_size, seq_len, depth] tensor
      updates: A tensor of size batch_size*seq_len(*depth)
      positions: A [batch_size, n_positions] tensor

    Returns: A tuple of two tensors. First is a [batch_size, seq_len] or
      [batch_size, seq_len, depth] tensor of "sequence" with elements at
      "positions" replaced by the values at "updates." Updates to index 0 are
      ignored. If there are duplicated positions the update is only applied once.
      Second is a [batch_size, seq_len] mask tensor of which inputs were updated.
    """
        shape = self.get_shape_list(sequence, expected_rank=[2, 3])
        depth_dimension = (len(shape) == 3)
        if depth_dimension:
            B, L, D = shape
        else:
            B, L = shape
            D = 1
            sequence = tf.expand_dims(sequence, -1)
        N = self.get_shape_list(positions)[1]

        shift = tf.expand_dims(L * tf.range(B), -1)
        flat_positions = tf.reshape(positions + shift, [-1, 1])
        flat_updates = tf.reshape(updates, [-1, D])
        updates = tf.scatter_nd(flat_positions, flat_updates, [B * L, D])
        updates = tf.reshape(updates, [B, L, D])

        flat_updates_mask = tf.ones([B * N], tf.int32)
        updates_mask = tf.scatter_nd(flat_positions, flat_updates_mask,
                                     [B * L])
        updates_mask = tf.reshape(updates_mask, [B, L])
        not_first_token = tf.concat(
            [tf.zeros((B, 1), tf.int32),
             tf.ones((B, L - 1), tf.int32)], -1)
        updates_mask *= not_first_token
        updates_mask_3d = tf.expand_dims(updates_mask, -1)

        # account for duplicate positions
        if sequence.dtype == tf.float32:
            updates_mask_3d = tf.cast(updates_mask_3d, tf.float32)
            updates /= tf.maximum(1.0, updates_mask_3d)
        else:
            assert sequence.dtype == tf.int32
            updates = tf.math.floordiv(updates, tf.maximum(1, updates_mask_3d))
        updates_mask = tf.minimum(updates_mask, 1)
        updates_mask_3d = tf.minimum(updates_mask_3d, 1)

        updated_sequence = (((1 - updates_mask_3d) * sequence) +
                            (updates_mask_3d * updates))
        if not depth_dimension:
            updated_sequence = tf.squeeze(updated_sequence, -1)

        return updated_sequence, updates_mask
Beispiel #3
0
def cross_replica_concat(tensor):
  """A cross-replica concatenation of a single Tensor across TPU cores.

  Input tensor is assumed to have batch dimension as the first dimension. The
  concatenation is done along the batch dimension.

  Args:
    tensor: Input Tensor which should be concatenated across TPU cores.

  Returns:
    The concatenated Tensor with batch dimension multiplied by the number of
      TPU cores.
  """
  num_tpu_replicas = tpu_function.get_tpu_context().number_of_shards

  if num_tpu_replicas is not None:
    # Scattered tensor has shape [num_replicas, local_batch_size, ...]
    scattered_tensor = tf.scatter_nd(
        indices=[[local_tpu_replica_id()]],
        updates=[tensor],
        shape=[num_tpu_replicas] + tensor.shape.as_list())
    reduced_tensor = tf.tpu.cross_replica_sum(scattered_tensor)
    # Returned tensor has shape [num_replicas * local_batch_size, ...]
    return tf.reshape(reduced_tensor,
                      [-1] + scattered_tensor.shape.as_list()[2:])
  else:
    # This is a no op if not running on TPU
    return tensor
def batch_segment_sum_embeddings(long_embeddings: tf.Tensor,
                                 long_word_idx: tf.Tensor,
                                 long_input_mask: tf.Tensor) -> tf.Tensor:
  """Sums wordpiece `long_embeddings` into word embeddings.

  Args:
    long_embeddings: <float32>[batch_size, long_max_length, hidden_size] Tensor
      of contextual embeddings for wordpieces, as output by ETC model.
    long_word_idx: <int32>[batch_size, long_max_length] Tensor representing the
      index of the word each wordpiece belongs to. The index for padding tokens
      can be any integer in the range [0, long_max_length) and will be ignored.
    long_input_mask: <int32>[batch_size, long_max_length] Tensor representing
      which *wordpiece* tokens in `long_embeddings` are present, with `1` for
      present tokens and `0` for padding.

  Returns:
    <float32>[batch_size, long_max_length, hidden_size] Tensor of embeddings
    for each word calculated by summing the embeddings of the wordpieces
    belonging to the word. The number of words is no greater than the number
    of wordpieces, but we keep `long_max_length`, so there may be an increase
    in padding. All padding embeddings will be 0.
  """
  # Zero out padding embeddings.
  long_embeddings *= tf.cast(
      long_input_mask, dtype=long_embeddings.dtype)[:, :, tf.newaxis]

  batch_size = tf.shape(long_embeddings)[0]
  example_idx = tf.broadcast_to(
      tf.range(batch_size)[:, tf.newaxis], shape=tf.shape(long_word_idx))
  scatter_indices = tf.stack([example_idx, long_word_idx], axis=-1)

  return tf.scatter_nd(
      indices=scatter_indices,
      updates=long_embeddings,
      shape=tf.shape(long_embeddings))
def batch_skew(vec, batch_size=None):
    """
    vec is N x 3, batch_size is int

    returns N x 3 x 3. Skew_sym version of each matrix.
    """
    print("vec=", vec)
    with tf.name_scope("batch_skew", values=[vec]):
        if batch_size is None:
            batch_size = vec.shape.as_list()[0]
        col_inds = tf.constant([1, 2, 3, 5, 6, 7])
        indices = tf.reshape(
            tf.reshape(tf.range(0, batch_size) * 9, [-1, 1]) + col_inds,
            [-1, 1])
        updates = tf.reshape(
            tf.stack([
                -vec[:, 2], vec[:, 1], vec[:, 2], -vec[:, 0], -vec[:, 1],
                vec[:, 0]
            ],
                     axis=1), [-1])
        out_shape = [batch_size * 9]
        res = tf.scatter_nd(indices, updates, out_shape)
        res = tf.reshape(res, [batch_size, 3, 3])

        return res
Beispiel #6
0
    def compute_p2LFA(self, P_stencil, n, grid_size):
        batch_size = P_stencil.get_shape().as_list()[0]
        K = self.map_2_to_1(grid_size=grid_size)
        pi = np.pi
        theta_x = np.array(([i * 2 * pi / n for i in range(-n // (grid_size * 2) + 1, n // (grid_size * 2) + 1)]))
        theta_y = np.array([i * 2 * pi / n for i in range(-n // (grid_size * 2) + 1, n // (grid_size * 2) + 1)])
        num_modes = theta_x.shape[0]

        X, Y = np.meshgrid(np.arange(-1, 2), np.arange(-1, 2))
        with tf.device(self.device):
            P = tf.zeros((len(theta_y), len(theta_x), batch_size, grid_size ** 2, (grid_size // 2) ** 2),
                         dtype=tf.complex128)
            modes = np.array([[np.exp(-1j * (tx * X + ty * Y)) for tx in theta_x] for ty in theta_y])
            fourier_component = tf.to_complex128(np.tile(modes, (batch_size, 1, 1, 1, 1)))
            for ic in range(grid_size // 2):
                i = 2 * ic  # ic is the index on the coarse grid, and i is the index on the fine grid
                for jc in range(grid_size // 2):
                    j = 2 * jc  # jc is the index on the coarse grid, and j is the index on the fine grid
                    J = int(grid_size // 2 * jc + ic)
                    for k in range(3):
                        for m in range(3):
                            I = int(K[i, j, k, m])
                            a = fourier_component[:, :, :, k, m] * tf.reshape(P_stencil[:, ic, jc, k, m], (-1, 1, 1))
                            a = tf.transpose(a, (1, 2, 0))

                            P = P + tf.to_complex128(
                                tf.scatter_nd(indices=tf.constant(self.idx_array((I, J, int(batch_size), num_modes))),
                                              updates=tf.ones(batch_size * (num_modes ** 2)),
                                              shape=tf.constant([num_modes, num_modes, batch_size, grid_size ** 2,
                                                                 (grid_size // 2) ** 2]))) \
                                * tf.reshape(a, (theta_x.shape[0], theta_y.shape[0], batch_size, 1, 1))
            return P
Beispiel #7
0
def cross_replica_concat(tensor, num_replicas, name=None):
    """Reduce a concatenation of the `tensor` across tpu cores.

  Branched from //audio/ears/nnfp/tensorflow/tpu_ops.py

  Args:
    tensor: tensor to concatenate.
    num_replicas: Number of TPU cores.
    name: A name for the op.

  Returns:
    Tensor of the same rank as `tensor` with first dimension `num_replicas`
    times larger.
  """
    replica_id = xla.replica_id()

    with tf.compat.v1.name_scope(name, 'tpu_cross_replica_concat'):
        # This creates a tensor that is like the input tensor but has an added
        # replica dimension as the outermost dimension. On each replica it will
        # contain the local values and zeros for all other values that need to be
        # fetched from other replicas.
        ext_tensor = tf.scatter_nd(indices=[[replica_id]],
                                   updates=[tensor],
                                   shape=[num_replicas] +
                                   tensor.shape.as_list())

        # As every value is only present on one replica and 0 in all others, adding
        # them all together will result in the full tensor on all replicas.
        ext_tensor = tf.compat.v1.tpu.cross_replica_sum(ext_tensor)

        # Flatten the replica dimension.
        # The first dimension size will be: tensor.shape[0] * num_replicas
        # Using [-1] trick to support also scalar input.
        return tf.reshape(ext_tensor, [-1] + ext_tensor.shape.as_list()[2:])
Beispiel #8
0
        def f2():
            # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
            # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
            # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.

            # To do this, we reshape `neg_class_loss_all` to 1D...
            neg_class_loss_all_1D = tf.reshape(
                neg_class_loss_all,
                [-1])  # Tensor of shape (batch_size * n_boxes,)
            # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
            values, indices = tf.nn.top_k(neg_class_loss_all_1D,
                                          n_negative_keep,
                                          False)  # We don't need sorting
            # ...and with these indices we'll create a mask...
            negatives_keep = tf.scatter_nd(
                tf.expand_dims(indices, axis=1),
                updates=tf.ones_like(indices, dtype=tf.int32),
                shape=tf.shape(neg_class_loss_all_1D
                               ))  # Tensor of shape (batch_size * n_boxes,)
            negatives_keep = tf.to_float(
                tf.reshape(negatives_keep,
                           [batch_size, n_boxes
                            ]))  # Tensor of shape (batch_size, n_boxes)
            # ...and use it to keep only those boxes and mask all other classification losses
            neg_class_loss = tf.reduce_sum(
                classification_loss * negatives_keep,
                axis=-1)  # Tensor of shape (batch_size,)
            return neg_class_loss
Beispiel #9
0
def cutout(x, toss, ratio=[1, 2]):
    batch_size = tf.shape(x)[0]
    image_size = tf.shape(x)[1:3]
    cutout_size = image_size * ratio[0] // ratio[1]
    offset_x = tf.random.uniform([tf.shape(x)[0], 1, 1],
                                 maxval=image_size[0] +
                                 (1 - cutout_size[0] % 2),
                                 dtype=tf.int32)
    offset_y = tf.random.uniform([tf.shape(x)[0], 1, 1],
                                 maxval=image_size[1] +
                                 (1 - cutout_size[1] % 2),
                                 dtype=tf.int32)
    grid_batch, grid_x, grid_y = tf.meshgrid(tf.range(batch_size,
                                                      dtype=tf.int32),
                                             tf.range(cutout_size[0],
                                                      dtype=tf.int32),
                                             tf.range(cutout_size[1],
                                                      dtype=tf.int32),
                                             indexing='ij')
    cutout_grid = tf.stack([
        grid_batch, grid_x + offset_x - cutout_size[0] // 2,
        grid_y + offset_y - cutout_size[1] // 2
    ],
                           axis=-1)
    mask_shape = tf.stack([batch_size, image_size[0], image_size[1]])
    cutout_grid = tf.maximum(cutout_grid, 0)
    cutout_grid = tf.minimum(cutout_grid,
                             tf.reshape(mask_shape - 1, [1, 1, 1, 3]))
    mask = tf.maximum(
        1 - tf.reshape(toss, [-1, 1, 1]) * tf.scatter_nd(
            cutout_grid,
            tf.ones([batch_size, cutout_size[0], cutout_size[1]],
                    dtype=tf.float32), mask_shape), 0)
    x = x * tf.expand_dims(mask, axis=3)
    return x
Beispiel #10
0
def tpu_cross_replica_concat(tensor, tpu_context=None):
    """Reduce a concatenation of the `tensor` across TPU cores.

    Args:
      tensor: tensor to concatenate.
      tpu_context: A `TPUContext`. If not set, CPU execution is assumed.

    Returns:
      Tensor of the same rank as `tensor` with first dimension `num_replicas`
      times larger.
    """
    if tpu_context is None or tpu_context.num_replicas <= 1:
        return tensor

    num_replicas = tpu_context.num_replicas

    with tf.name_scope("tpu_cross_replica_concat"):
        # This creates a tensor that is like the input tensor but has an added
        # replica dimension as the outermost dimension. On each replica it will
        # contain the local values and zeros for all other values that need to be
        # fetched from other replicas.
        ext_tensor = tf.scatter_nd(
            indices=[[xla.replica_id()]],
            updates=[tensor],
            shape=[num_replicas] + tensor.shape.as_list(),
        )

        # As every value is only present on one replica and 0 in all others, adding
        # them all together will result in the full tensor on all replicas.
        ext_tensor = tf.tpu.cross_replica_sum(ext_tensor)

        # Flatten the replica dimension.
        # The first dimension size will be: tensor.shape[0] * num_replicas
        # Using [-1] trick to support also scalar input.
        return tf.reshape(ext_tensor, [-1] + ext_tensor.shape.as_list()[2:])
Beispiel #11
0
 def compute_dense_matrix(self, stencils, batch_size, grid_size):
     with tf.device(self.device):
         indexes, values_indices = self.get_indices_compute_A((batch_size, grid_size))
         tau = tf.scatter_nd(indices=indexes,
                             updates=tf.gather_nd(params=stencils, indices=values_indices),
                             shape=(batch_size, grid_size ** 2, grid_size ** 2))
         return tau
Beispiel #12
0
def _cic_paint(mesh, neighboor_coords, kernel, shift, name=None):
    """
  Paints particules on a 3D mesh.

  Parameters:
  -----------
  mesh: tensor (batch_size, nc, nc, nc)
    Input 3D mesh tensor

  shift: [x,y,z] array of coordinate shifting
  """
    with tf.name_scope(name, "cic_update", [mesh, neighboor_coords, kernel]):
        shape = tf.shape(mesh)
        batch_size = shape[0]
        nx, ny, nz = shape[-3], shape[-2], shape[-1]

        # TODO: Assert shift shape
        neighboor_coords = tf.reshape(neighboor_coords, (-1, 8, 4))
        neighboor_coords = neighboor_coords + tf.reshape(
            tf.constant(shift), [1, 1, 4])

        update = tf.scatter_nd(neighboor_coords, tf.reshape(kernel, (-1, 8)),
                               [batch_size, nx, ny, nz])

        mesh = mesh + tf.reshape(update, mesh.shape)
        return mesh
Beispiel #13
0
 def _unsparsify(x):
     if not isinstance(x, tf.IndexedSlices):
         return x
     assert x.dense_shape is not None, "memory_saving_gradients encountered sparse gradients of unknown shape"
     indices = x.indices
     while indices.shape.ndims < x.values.shape.ndims:
         indices = tf.expand_dims(indices, -1)
     return tf.scatter_nd(indices, x.values, x.dense_shape)
 def infer_step(i, recent_output, recent_logits, cache, decoding_stats):
     """Inference step."""
     features_copy = features.copy()
     features_copy["targets"] = recent_output
     cur_sample, cur_logit = self.sample(features_copy,
                                         decode_step=i,
                                         cache=cache,
                                         decoding_stats=decoding_stats)
     pos = i
     samples = recent_output + tf.scatter_nd(
         indices=[[b, pos, 0, 0] for b in range(self.batch_size)],
         updates=cur_sample,
         shape=utils.shape_list(recent_output))
     logits = recent_logits + tf.scatter_nd(
         indices=[[b, pos] for b in range(self.batch_size)],
         updates=cur_logit,
         shape=utils.shape_list(recent_logits))
     return i + 1, samples, logits, cache, decoding_stats
Beispiel #15
0
    def _calc_final_dist(self, vocab_dists, attn_dists):
        """Calculate the final distribution, for the pointer-generator model

    Args:
      vocab_dists: The vocabulary distributions. List length max_dec_steps of (batch_size, vsize) arrays. The words are in the order they appear in the vocabulary file.
      attn_dists: The attention distributions. List length max_dec_steps of (batch_size, attn_len) arrays

    Returns:
      final_dists: The final distributions. List length max_dec_steps of (batch_size, extended_vsize) arrays.
    """
        with tf.variable_scope('final_distribution'):
            # Multiply vocab dists by p_gen and attention dists by (1-p_gen)
            vocab_dists = [
                p_gen * dist for (p_gen, dist) in zip(self.p_gens, vocab_dists)
            ]
            attn_dists = [(1 - p_gen) * dist
                          for (p_gen, dist) in zip(self.p_gens, attn_dists)]

            # Concatenate some zeros to each vocabulary dist, to hold the probabilities for in-article OOV words
            extended_vsize = self._vocab.size(
            ) + self._max_art_oovs  # the maximum (over the batch) size of the extended vocabulary
            extra_zeros = tf.zeros((self._hps.batch_size, self._max_art_oovs))
            vocab_dists_extended = [
                tf.concat(axis=1, values=[dist, extra_zeros])
                for dist in vocab_dists
            ]  # list length max_dec_steps of shape (batch_size, extended_vsize)

            # Project the values in the attention distributions onto the appropriate entries in the final distributions
            # This means that if a_i = 0.1 and the ith encoder word is w, and w has index 500 in the vocabulary, then we add 0.1 onto the 500th entry of the final distribution
            # This is done for each decoder timestep.
            # This is fiddly; we use tf.scatter_nd to do the projection
            batch_nums = tf.range(
                0, limit=self._hps.batch_size)  # shape (batch_size)
            batch_nums = tf.expand_dims(batch_nums, 1)  # shape (batch_size, 1)
            attn_len = tf.shape(self._enc_batch_extend_vocab)[
                1]  # number of states we attend over
            batch_nums = tf.tile(batch_nums,
                                 [1, attn_len])  # shape (batch_size, attn_len)
            indices = tf.stack((batch_nums, self._enc_batch_extend_vocab),
                               axis=2)  # shape (batch_size, enc_t, 2)
            shape = [self._hps.batch_size, extended_vsize]
            attn_dists_projected = [
                tf.scatter_nd(indices, copy_dist, shape)
                for copy_dist in attn_dists
            ]  # list length max_dec_steps (batch_size, extended_vsize)

            # Add the vocab distributions and the copy distributions together to get the final distributions
            # final_dists is a list length max_dec_steps; each entry is a tensor shape (batch_size, extended_vsize) giving the final distribution for that decoder timestep
            # Note that for decoder timesteps and examples corresponding to a [PAD] token, this is junk - ignore.
            final_dists = [
                vocab_dist + copy_dist
                for (vocab_dist, copy_dist
                     ) in zip(vocab_dists_extended, attn_dists_projected)
            ]

            return final_dists
    def _unsparsify(x):
      if not isinstance(x, tf.IndexedSlices):
        return x
      if x.dense_shape is None:
        raise ValueError(
            "memory_saving_gradients has sparse gradients of unknown shape.")

      indices = x.indices
      while indices.shape.ndims < x.values.shape.ndims:
        indices = tf.expand_dims(indices, -1)
      return tf.scatter_nd(indices, x.values, x.dense_shape)
Beispiel #17
0
            def maybe_sample():
                """Perform scheduled sampling."""
                def maybe_concatenate_auxiliary_inputs(outputs_, indices=None):
                    """Concatenate outputs with auxiliary inputs, if they exist."""
                    if self._auxiliary_input_tas is None:
                        return outputs_

                    next_time = time + 1
                    auxiliary_inputs = tf.nest.map_structure(
                        lambda ta: ta.read(next_time),
                        self._auxiliary_input_tas)
                    if indices is not None:
                        auxiliary_inputs = tf.gather_nd(
                            auxiliary_inputs, indices)
                    return tf.nest.map_structure(
                        lambda x, y: tf.concat((x, y), -1), outputs_,
                        auxiliary_inputs)

                if self._next_inputs_fn is None:
                    return tf.where(
                        sample_ids,
                        maybe_concatenate_auxiliary_inputs(outputs),
                        base_next_inputs)

                where_sampling = tf.cast(tf.where(sample_ids), tf.int32)
                where_not_sampling = tf.cast(
                    tf.where(tf.logical_not(sample_ids)), tf.int32)
                outputs_sampling = tf.gather_nd(outputs, where_sampling)
                inputs_not_sampling = tf.gather_nd(base_next_inputs,
                                                   where_not_sampling)
                sampled_next_inputs = maybe_concatenate_auxiliary_inputs(
                    self._next_inputs_fn(outputs_sampling), where_sampling)

                base_shape = tf.shape(base_next_inputs)
                return (tf.scatter_nd(indices=where_sampling,
                                      updates=sampled_next_inputs,
                                      shape=base_shape) +
                        tf.scatter_nd(indices=where_not_sampling,
                                      updates=inputs_not_sampling,
                                      shape=base_shape))
Beispiel #18
0
def penalize_used(logits, output):

    # I want to change the indices of logits wherever the index is found in output
    change_tensor = tf.zeros_like(logits, dtype=logits.dtype)
    unique = tf.unique(output[0])[0]
    ones = tf.ones_like(unique, dtype=unique.dtype)
    indices = tf.expand_dims(unique, 1)

    updates = tf.scatter_nd(indices, ones, [logits.shape[1]])

    bool_tensor = tf.expand_dims(tf.cast(updates, tf.bool), 0)

    return tf.compat.v1.where(bool_tensor, logits * 0.85, logits)
Beispiel #19
0
 def build_graph(parameters):
     """Build the scatter_nd op testing graph."""
     indices = tf.compat.v1.placeholder(dtype=parameters["indices_dtype"],
                                        name="indices",
                                        shape=parameters["indices_shape"])
     updates = tf.compat.v1.placeholder(dtype=parameters["updates_dtype"],
                                        name="updates",
                                        shape=parameters["updates_shape"])
     shape = tf.compat.v1.placeholder(dtype=parameters["shape_dtype"],
                                      name="shape",
                                      shape=parameters["shape_shape"])
     out = tf.scatter_nd(indices, updates, shape)
     return [indices, updates, shape], [out]
Beispiel #20
0
  def call(self, x, padding=None):
    """Return outputs of the feedforward network.

    Args:
      x: tensor with shape [batch_size, length, hidden_size]
      padding: (optional) If set, the padding values are temporarily removed
        from x (provided self.allow_pad is set). The padding values are placed
        back in the output tensor in the same locations.
        shape [batch_size, length]

    Returns:
      Output of the feedforward network.
      tensor with shape [batch_size, length, hidden_size]
    """
    padding = None if not self.allow_pad else padding

    # Retrieve dynamically known shapes
    batch_size = tf.shape(x)[0]
    length = tf.shape(x)[1]

    if padding is not None:
      with tf.name_scope("remove_padding"):
        # Flatten padding to [batch_size*length]
        pad_mask = tf.reshape(padding, [-1])

        nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))

        # Reshape x to [batch_size*length, hidden_size] to remove padding
        x = tf.reshape(x, [-1, self.hidden_size])
        x = tf.gather_nd(x, indices=nonpad_ids)

        # Reshape x from 2 dimensions to 3 dimensions.
        x.set_shape([None, self.hidden_size])
        x = tf.expand_dims(x, axis=0)

    output = self.filter_dense_layer(x)
    if self.train:
      output = tf.nn.dropout(output, 1.0 - self.relu_dropout)
    output = self.output_dense_layer(output)

    if padding is not None:
      with tf.name_scope("re_add_padding"):
        output = tf.squeeze(output, axis=0)
        output = tf.scatter_nd(
            indices=nonpad_ids,
            updates=output,
            shape=[batch_size * length, self.hidden_size]
        )
        output = tf.reshape(output, [batch_size, length, self.hidden_size])
    return output
Beispiel #21
0
def set_final(sequence, sequence_length, values, time_major=False):
    """Sets the final values in a batch of sequences, and clears those after."""
    sequence_batch_major = (sequence if not time_major else tf.transpose(
        sequence, [1, 0, 2]))
    final_index = _get_final_index(sequence_length, time_major=False)
    mask = tf.sequence_mask(tf.maximum(0, sequence_length - 1),
                            maxlen=sequence_batch_major.shape[1],
                            dtype=tf.float32)
    sequence_batch_major = (
        tf.expand_dims(mask, axis=-1) * sequence_batch_major +
        tf.scatter_nd(final_index, values, tf.shape(sequence_batch_major)))
    if time_major:
        return tf.transpose(sequence_batch_major, [1, 0, 2])
    return sequence_batch_major
Beispiel #22
0
    def __call__(self, shape, dtype=None, partition_info=None):
        del partition_info  # unused
        assert len(shape) > 2, shape

        support = tuple(shape[:-2]) + (1, 1)
        indices = [[s // 2 for s in support]]
        updates = tf.constant([self.gain], dtype=dtype)
        kernel = tf.scatter_nd(indices, updates, support)

        assert shape[-2] == shape[-1], shape
        if shape[-1] != 1:
            kernel *= tf.eye(shape[-1], dtype=dtype)

        return kernel
Beispiel #23
0
def _preprocess_candidate_answers(features, max_num_candidates,
                                  max_seq_length):
    """Prepares dense labels for each candidate."""
    ragged_indices = tf.RaggedTensor.from_row_lengths(
        features["can_indexes"].values, features["can_sizes"])
    candidate_id = tf.ragged.row_splits_to_segment_ids(
        ragged_indices.row_splits)
    indices = tf.stack([candidate_id, ragged_indices.flat_values], axis=-1)
    updates = tf.ones_like(candidate_id, dtype=tf.int32)
    features["can_label_ids"] = tf.scatter_nd(
        indices=indices,
        updates=updates,
        shape=[max_num_candidates, max_seq_length])
    # Variable length tensors are not supported on TPU.
    del features["can_indexes"]
Beispiel #24
0
def maskedSoftmax(logits, mask):
    """
    Masked softmax over dim 1
    :param logits: (N, L)
    :param mask: (N, L)
    :return: probabilities (N, L)
    """
    indices = tf.where(mask)
    values = tf.gather_nd(logits, indices)
    denseShape = tf.cast(tf.shape(logits), tf.int64)
    sparseResult = tf.sparse_softmax(
        tf.SparseTensor(indices, values, denseShape))
    result = tf.scatter_nd(sparseResult.indices, sparseResult.values,
                           sparseResult.dense_shape)
    result.set_shape(logits.shape)
    return result
Beispiel #25
0
def render_by_scatter(size, points, colors=None, gt_presence=None):
    """Renders point by using tf.scatter_nd."""

    if colors is None:
        colors = tf.ones(points.shape[:-1].as_list() + [3], dtype=tf.float32)

    if gt_presence is not None:
        colors *= tf.cast(tf.expand_dims(gt_presence, -1), colors.dtype)

    batch_size, n_points = points.shape[:-1].as_list()
    shape = [batch_size] + list(size) + [3]
    batch_idx = tf.reshape(tf.range(batch_size), [batch_size, 1, 1])
    batch_idx = snt.TileByDim([1], [n_points])(batch_idx)
    idx = tf.concat([batch_idx, tf.cast(points, tf.int32)], -1)

    return tf.scatter_nd(idx, colors, shape)
def max_scoring_span(start_scores, end_scores, max_length, no_answer_bias=0):
    """Compute max scoring span, using the sum of start and end scores.

  Args:
    start_scores: <float32> [batch_size, seq_len]
    end_scores: <float32> [batch_size, seq_len]
    max_length: <int32> Max answer length.
    no_answer_bias: <float32> Log-odds threshold for "no-answer" selection. I.e.
      if log p(span=i,j)/p(span=NULL) > no_answer_bias, then select i, j as the
      span, and NULL otherwise.

  Returns:
    start: <int32> [batch_size]
    end: <int32> [batch_size]
  """
    # Create sparse tensor of size [seq_len].
    seq_len = tensor_utils.shape(start_scores, -1)
    no_answer_bias = tf.scatter_nd([[0]], [no_answer_bias], [seq_len])
    no_answer_bias = tf.cast(no_answer_bias, tf.float32)

    # Apply bias to CLS token logits.
    no_answer_bias = tf.div(no_answer_bias, 2)
    start_scores += tf.expand_dims(no_answer_bias, 0)
    end_scores += tf.expand_dims(no_answer_bias, 0)

    # Compute outer sum, and mask to be upper triangular.
    # This gives a matrix of start[i] + end[j] scores, where j >= i.
    scores = tf.expand_dims(start_scores, 2) + tf.expand_dims(end_scores, 1)
    mask = (1 - tf.matrix_band_part(tf.ones_like(scores), 0, max_length - 1))
    scores -= mask * 1e-4

    def map_fn(inputs):
        flattened = tf.reshape(inputs, [-1])
        argmax = tf.argmax(flattened, output_type=tf.int32)
        indices = tensor_utils.unravel_index_2d(argmax, inputs.shape)
        score = flattened[argmax]
        return indices, score

    # Return i, j indices of max-scoring entry.
    with tf.device("/cpu"):
        endpoints, span_scores = tf.map_fn(fn=map_fn,
                                           elems=scores,
                                           dtype=(tf.int32, tf.float32))
    start = endpoints[:, 0]
    end = endpoints[:, 1]

    return start, end, span_scores
Beispiel #27
0
def masked_apply(tensor, op, mask, set_outside_zero=True):
    """
    Apply the function op to tensor only at locations indicated by mask. If set_outside_zero == True, set the
    locations outside the mask to zero, otherwise keep original value of tensor.
    :param tensor: The tensor on which op is applied.
    :param op: The operation.
    :param mask: The boolean mask.
    :param set_outside_zero: If True, set the locations outside the mask to zero, otherwise keep original values of tensor.
    :return: Tensor with applied function.
    """
    chosen = tf.boolean_mask(tensor, mask)
    applied = op(chosen)
    idx = tf.to_int32(tf.where(mask))
    result = tf.scatter_nd(idx, applied, tf.shape(tensor))
    if not set_outside_zero:
        result = tf.where(mask, result, tensor)
    return result
Beispiel #28
0
    def model_fun(features, labels, mode, params):
        atomic_contributions = {}
        atom_types = params['atom_types']
        for (t, lays, offs, acts) in zip(atom_types, params['layers'],
                                         params['offsets'],
                                         params['act_funs']):
            with _tf.variable_scope('{}_ANN'.format(t), reuse=_tf.AUTO_REUSE):
                input_tensor = features['%s_input' % t]
                atomic_contributions[t] = BPAtomicNN(input_tensor, lays, offs,
                                                     acts)

        predicted_energies = _tf.scatter_nd(
            _tf.concat([features['%s_indices' % t] for t in atom_types], 0),
            _tf.concat([
                _tf.reshape(atomic_contributions[t].output, [-1])
                for t in atom_types
            ], 0),
            _tf.shape(labels),
            name='E_prediction')

        if mode == _tf.estimator.ModeKeys.PREDICT:
            predictions = {'energies': predicted_energies}
            return _tf.estimator.EstimatorSpec(mode, predictions=predictions)

        # num_atoms = _tf.reduce_sum([_tf.bincount(features['%s_indices' % t])
        #                            for t in atom_types], axis=0,
        #                           name='NumberOfAtoms')
        # Compute loss.
        loss = _tf.losses.mean_squared_error(labels=labels,
                                             predictions=predicted_energies)

        rmse = _tf.metrics.root_mean_squared_error(labels, predicted_energies)
        metrics = {'rmse': rmse}
        _tf.summary.scalar('rmse', rmse[1])

        if mode == _tf.estimator.ModeKeys.EVAL:
            return _tf.estimator.EstimatorSpec(mode,
                                               loss=loss,
                                               eval_metric_ops=metrics)

        assert mode == _tf.estimator.ModeKeys.TRAIN
        optimizer = _tf.train.AdagradOptimizer(learning_rate=0.1)
        train_op = optimizer.minimize(loss,
                                      global_step=_tf.train.get_global_step())
        return _tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
Beispiel #29
0
    def _initialize_vars(self):
        """Sets up the training graph."""
        with tf.variable_scope(self.name) as scope:
            self.global_step = tf.get_variable(
                'global_step', shape=[], initializer=tf.zeros_initializer())
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, self.input_dim])

        current = self.input
        for i in range(self.encode_layers - 1):
            current = self._relu_layer(current, self.input_dim, self.input_dim,
                                       i)
        self.encoded = self._relu_layer(current, self.input_dim,
                                        self.hidden_units,
                                        self.encode_layers - 1)

        # Make batch size the last dimension (for use with tf.nn.top_k)
        encoded_t = tf.transpose(self.encoded)

        # Compute the indices corresponding to the top k activations for each
        # neuron in the final encoder layer
        k = int(self.sparsity * self.batch_size)
        _, top_indices = tf.nn.top_k(encoded_t, k=k, sorted=False)

        # Transform top_indices, which contains rows of column indices, into
        # indices, a list of [row, column] pairs (for use with tf.scatter_nd)
        top_k_unstacked = tf.unstack(top_indices, axis=1)
        row_indices = [tf.range(self.hidden_units) for _ in range(k)]
        combined_columns = tf.transpose(
            tf.stack(_interleave(row_indices, top_k_unstacked)))
        indices = tf.reshape(combined_columns, [-1, 2])

        # Apply sparsity constraint
        updates = tf.ones(self.hidden_units * k)
        shape = tf.constant([self.hidden_units, self.batch_size])
        mask = tf.scatter_nd(indices, updates, shape)
        sparse_encoded = self.encoded * tf.transpose(mask)

        self.decoded = self._decode_layer(sparse_encoded)

        self.loss = tf.reduce_sum(tf.square(self.decoded - self.input))
        self.optimizer_op = self.optimizer(self.learning_rate).minimize(
            self.loss, self.global_step)

        self.saver = tf.train.Saver(tf.global_variables())
 def nucleus_sampling(self, logits):
     """Nucleus sampling."""
     p = self.hparams.nucleus_sampling
     tf.logging.info("Nucleus sampling top_p = {}".format(p))
     sort_indices = tf.argsort(logits, axis=-1, direction="DESCENDING")
     probs = tf.gather(tf.nn.softmax(logits), sort_indices, batch_dims=1)
     cumprobs = tf.cumsum(probs, axis=-1, exclusive=True)
     # The top 1 candidate always will not be masked.
     # This way ensures at least 1 indices will be selected.
     sort_mask = tf.cast(tf.greater(cumprobs, p), logits.dtype)
     batch_indices = tf.tile(
         tf.expand_dims(tf.range(logits.shape[0]), axis=-1),
         [1, logits.shape[1]])
     top_p_mask = tf.scatter_nd(
         tf.stack([batch_indices, sort_indices], axis=-1), sort_mask,
         logits.shape)
     logits -= top_p_mask * logits.dtype.max
     return logits