コード例 #1
0
 def _apply_sparse(self, cache):
   """"""
   
   x_tm1, g_t, idxs = cache['x_tm1'], cache['g_t'], cache['idxs']
   idxs, idxs_ = tf.unique(idxs)
   g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
   updates = cache['updates']
   
   if self.mu > 0:
     m_t, t_m = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', beta=self.mu)
     m_t_ = tf.gather(m_t, idxs)
     m_bar_t_ = (1-self.gamma) * m_t_ + self.gamma * g_t_
     updates.extend([m_t, t_m])
   else:
     m_bar_t_ = g_t_
   
   if self.nu > 0:
     v_t, t_v = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', beta=self.nu)
     v_t_ = tf.gather(v_t, idxs)
     v_bar_t_ = tf.sqrt(v_t_ + self.epsilon)
     updates.extend([v_t, t_v])
   else:
     v_bar_t_ = 1
   
   s_t_ = self.learning_rate * m_bar_t_ / v_bar_t_
   cache['s_t'] = s_t_
   cache['g_t'] = g_t_
   cache['idxs'] = idxs
   return cache
コード例 #2
0
    def _grad_variance(self):
        """Estimate of gradient Variance.

    Returns:
      C_t ops.
    """
        grad_var_ops = []
        tensor_to_avg = []
        for t, g in zip(self._vars, self._grad):
            if isinstance(g, tf.IndexedSlices):
                tensor_to_avg.append(
                    tf.reshape(tf.unsorted_segment_sum(g.values, g.indices,
                                                       g.dense_shape[0]),
                               shape=t.get_shape()))
            else:
                tensor_to_avg.append(g)
        avg_op = self._moving_averager.apply(tensor_to_avg)
        grad_var_ops.append(avg_op)
        with tf.control_dependencies([avg_op]):
            self._grad_avg = [
                self._moving_averager.average(val) for val in tensor_to_avg
            ]
            self._grad_avg_squared = [tf.square(val) for val in self._grad_avg]

        # Compute Variance
        self._grad_var = tf.maximum(
            tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype),
            self._grad_norm_squared_avg -
            tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared]))
        if self._sparsity_debias:
            self._grad_var *= self._sparsity_avg
        return grad_var_ops  # C_t
コード例 #3
0
ファイル: sgd_optimizer.py プロジェクト: Anat37/Parser-v1
 def _apply_sparse(self, cache):
   """"""
   
   g_t, idxs = cache['g_t'], cache['idxs']
   idxs, idxs_ = tf.unique(idxs)
   g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
   
   cache['g_t'] = g_t_
   cache['idxs'] = idxs
   cache['s_t'] = self.learning_rate * g_t_
   
   return cache
コード例 #4
0
def accumulate_sparse_gradients(grad):
    """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

    grad_indices, grad_segments = tf.unique(grad.indices)
    grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                          tf.size(grad_indices))
    return grad_indices, grad_values
コード例 #5
0
ファイル: model.py プロジェクト: graphcore/examples
def index_softmax(values: tf.Tensor, indices: tf.Tensor,
                  n_indices: int) -> tf.Tensor:
    """Compute multiple softmax() in groups defined by indices.

    E.g.
        index_softmax([0, 0, ln(2), 2], [0, 0, 0, 1], 2)
          computes softmax([0, 0, ln(2)]) and softmax([2])
        => [0.25, 0.25, 0.5, 1.0]

    Acts over axis=0 of values.
    """
    # Run everything in float32, for stability
    dtype = values.dtype
    values = tf.cast(values, tf.float32)

    max_values = tf.reduce_max(values, axis=0, keepdims=True)
    exp_values = tf.exp(values - max_values)
    # Max(*, 1e-6) prevents a DIV0 error, caused by underflow of the sum-exp.
    sum_exp_values = tf.maximum(
        tf.unsorted_segment_sum(exp_values, indices, n_indices), 1e-6)
    return tf.cast(exp_values / tf.gather(sum_exp_values, indices), dtype)
コード例 #6
0
ファイル: model.py プロジェクト: graphcore/examples
def transformer_conv(
    n_output: int,
    n_heads: int,
    dropout: float,
    nodes: tf.Tensor,
    edge_idx: tf.Tensor,
    edges: tf.Tensor,
) -> tf.Tensor:
    """Implementation of Graph Transformer, https://arxiv.org/abs/2009.03509.

    Matches the specification of TransformerConv in PyTorch Geometric, always using
    a "skip" projection from inputs and shared key/value projections for edges.

    Arguments:

      n_output -- output feature size

      n_heads -- number of attention heads (note: head size is given by n_output/n_heads)

      dropout -- rate parameter for attention mask (post-softmax) dropout

      nodes -- shape (n_nodes, node_feature_size), input features for each node

      edge_idx -- shape (2, n_edges), (0 <= edge_idx < n_nodes), the source and
                  destination of each edge, indexing into nodes

      edges -- shape (n_edges, edge_feature_size), input features for each edge

    Returns:

      tensor of shape (n_nodes, n_output), node features after applying a graph
      transformer (attention) layer
    """
    assert n_output % n_heads == 0, \
        "graph transformer output size should be divisible by the number of heads"
    head_size = n_output // n_heads
    n_nodes, _ = assert_shape(nodes, (None, None))
    _, n_edges = assert_shape(edge_idx, (2, None))
    assert_shape(edges, (n_edges, None))

    with tf.variable_scope("skip"):
        skip = linear(nodes, n_output)

    with tf.variable_scope("edge_shared_kv"):
        edge_kv = linear(edges, n_output, use_bias=False)

    with tf.variable_scope("node_qkv"):
        node_qkv = linear(nodes, 3 * n_output)

    with tf.variable_scope("attention"):
        q = tf.gather(node_qkv[:, :n_output], edge_idx[1])
        kv = tf.reshape(
            tf.gather(node_qkv[:, n_output:], edge_idx[0]),
            (n_edges, 2, n_output),
        )
        k, v = tf.unstack(kv + edge_kv[:, tf.newaxis, :], axis=1)
        a = tf.reduce_sum(tf.reshape(q * k, (n_edges, n_heads, head_size)),
                          -1) / (head_size**0.5)
        a = index_softmax(a, edge_idx[1], n_nodes)
        if dropout:
            a = tf.nn.dropout(a, rate=dropout)
        attention = tf.unsorted_segment_sum(
            tf.repeat(a, head_size, axis=1) * v, edge_idx[1], n_nodes)

    return skip + attention
コード例 #7
0
ファイル: homework2-5.py プロジェクト: shenyiting2018/ml
def data_group_avg(assignments, data):
    sum_total = tf.unsorted_segment_sum(data, assignments, 3)
    num_total = tf.unsorted_segment_sum(tf.ones_like(data), assignments, 3)
    avg_by_group = sum_total / num_total
    return avg_by_group
コード例 #8
0
def follow_mention(batch_entities,
                   relation_st_qry,
                   relation_en_qry,
                   entity_word_ids,
                   entity_word_masks,
                   ent2ment_ind,
                   ent2ment_val,
                   ment2ent_map,
                   word_emb_table,
                   word_weights,
                   mips_search_fn,
                   tf_db,
                   hidden_size,
                   mips_config,
                   qa_config,
                   is_training,
                   ensure_index=None):
  """Sparse implementation of the relation follow operation.

  Args:
    batch_entities: [batch_size, num_entities] SparseTensor of incoming entities
      and their scores.
    relation_st_qry: [batch_size, dim] Tensor representating start query vectors
      for dense retrieval.
    relation_en_qry: [batch_size, dim] Tensor representating end query vectors
      for dense retrieval.
    entity_word_ids: [num_entities, max_entity_len] Tensor holding word ids of
      each entity.
    entity_word_masks: [num_entities, max_entity_len] Tensor with masks into
      word ids above.
    ent2ment_ind: [num_entities, num_mentions] RaggedTensor mapping entities to
      mention indices which co-occur with them.
    ent2ment_val: [num_entities, num_mentions] RaggedTensor mapping entities to
      mention scores which co-occur with them.
    ment2ent_map: [num_mentions] Tensor mapping mentions to their entities.
    word_emb_table: [vocab_size, dim] Tensor of word embedddings.  (?)
    word_weights: [vocab_size, 1] Tensor of word weights.  (?)
    mips_search_fn: Function which accepts a dense query vector and returns the
      top-k indices closest to it (from the tf_db).
    tf_db: [num_mentions, 2 * dim] Tensor of mention representations.
    hidden_size: Scalar dimension of word embeddings.
    mips_config: MIPSConfig object.
    qa_config: QAConfig object.
    is_training: Boolean.
    ensure_index: [batch_size] Tensor of mention ids. Only needed if
      `is_training` is True.  (? each example only one ensure entity?)

  Returns:
    ret_mentions_ids: [batch_size, k] Tensor of retrieved mention ids.
    ret_mentions_scs: [batch_size, k] Tensor of retrieved mention scores.
    ret_entities_ids: [batch_size, k] Tensor of retrieved entities ids.
  """
  if qa_config.entity_score_threshold is not None:
    # Remove the entities which have scores lower than the threshold.
    mask = tf.greater(batch_entities.values, qa_config.entity_score_threshold)
    batch_entities = tf.sparse.retain(batch_entities, mask)
  batch_size = batch_entities.dense_shape[0]  # number of the batches
  batch_ind = batch_entities.indices[:, 0]  # the list of the batch ids
  entity_ind = batch_entities.indices[:, 1]  # the list of the entity ids
  entity_scs = batch_entities.values  # the list of the scores of each entity

  # Obtain BOW embeddings for the given set of entities.
  # [NNZ, dim]  NNZ (number of non-zero entries) = len(entity_ind)
  batch_entity_emb = model_utils.entity_emb(entity_ind, entity_word_ids,
                                            entity_word_masks, word_emb_table,
                                            word_weights)
  batch_entity_emb = batch_entity_emb * tf.expand_dims(entity_scs, axis=1)
  # [batch_size, dim]
  uniq_batch_ind, uniq_idx = tf.unique(batch_ind)
  agg_emb = tf.unsorted_segment_sum(batch_entity_emb, uniq_idx,
                                    tf.shape(uniq_batch_ind)[0])
  batch_bow_emb = tf.scatter_nd(
      tf.expand_dims(uniq_batch_ind, 1), agg_emb,
      tf.stack([batch_size, hidden_size], axis=0))
  batch_bow_emb.set_shape([None, hidden_size])
  if qa_config.projection_dim is not None:
    with tf.variable_scope("projection"):
      batch_bow_emb = contrib_layers.fully_connected(
          batch_bow_emb,
          qa_config.projection_dim,
          activation_fn=tf.nn.tanh,
          reuse=tf.AUTO_REUSE,
          scope="bow_projection")
  # Each instance in a batch has onely one vector as embedding.

  # Ragged sparse search.
  # (num_batch x num_entities) * (num_entities x num_mentions)
  # [batch_size x num_mentions] sparse
  sp_mention_vec = model_utils.sparse_ragged_mul(
      batch_entities,
      ent2ment_ind,
      ent2ment_val,
      batch_size,
      mips_config.num_mentions,
      qa_config.sparse_reduce_fn,  # max or sum
      threshold=qa_config.entity_score_threshold,
      fix_values_to_one=qa_config.fix_sparse_to_one)
  if is_training and qa_config.ensure_answer_sparse:
    ensure_indices = tf.stack([tf.range(batch_size), ensure_index], axis=-1)
    sp_ensure_vec = tf.SparseTensor(
        tf.cast(ensure_indices, tf.int64),
        tf.ones([batch_size]),
        dense_shape=[batch_size, mips_config.num_mentions])
    sp_mention_vec = tf.sparse.add(sp_mention_vec, sp_ensure_vec)
    sp_mention_vec = tf.SparseTensor(
        indices=sp_mention_vec.indices,
        values=tf.minimum(1., sp_mention_vec.values),
        dense_shape=sp_mention_vec.dense_shape)

  # Dense scam search.
  # [batch_size, 2 * dim]
  # Constuct query embeddings (dual encoder: [subject; relation]).
  scam_qrys = tf.concat(
      [batch_bow_emb + relation_st_qry, batch_bow_emb + relation_en_qry],
      axis=1)
  with tf.device("/cpu:0"):
    # [batch_size, num_neighbors]
    _, ret_mention_ids = mips_search_fn(scam_qrys)
    if is_training and qa_config.ensure_answer_dense:
      ret_mention_ids = model_utils.ensure_values_in_mat(
          ret_mention_ids, ensure_index, tf.int32)
    # [batch_size, num_neighbors, 2 * dim]
    ret_mention_emb = tf.gather(tf_db, ret_mention_ids)

  if qa_config.l2_normalize_db:
    ret_mention_emb = tf.nn.l2_normalize(ret_mention_emb, axis=2)
  # [batch_size, 1, num_neighbors]
  ret_mention_scs = tf.matmul(
      tf.expand_dims(scam_qrys, 1), ret_mention_emb, transpose_b=True)
  # [batch_size, num_neighbors]
  ret_mention_scs = tf.squeeze(ret_mention_scs, 1)
  # [batch_size, num_mentions] sparse
  dense_mention_vec = model_utils.convert_search_to_vector(
      ret_mention_scs, ret_mention_ids, tf.cast(batch_size, tf.int32),
      mips_config.num_neighbors, mips_config.num_mentions)

  # Combine sparse and dense search.
  if (is_training and qa_config.train_with_sparse) or (
      (not is_training) and qa_config.predict_with_sparse):
    # [batch_size, num_mentions] sparse
    if qa_config.sparse_strategy == "dense_first":
      ret_mention_vec = model_utils.sp_sp_matmul(dense_mention_vec,
                                                 sp_mention_vec)
    elif qa_config.sparse_strategy == "sparse_first":
      with tf.device("/cpu:0"):
        ret_mention_vec = model_utils.rescore_sparse(sp_mention_vec, tf_db,
                                                     scam_qrys)
    else:
      raise ValueError("Unrecognized sparse_strategy %s" %
                       qa_config.sparse_strategy)
  else:
    # [batch_size, num_mentions] sparse
    ret_mention_vec = dense_mention_vec

  # Get entity scores and ids.
  # [batch_size, num_entities] sparse
  entity_indices = tf.cast(
      tf.gather(ment2ent_map, ret_mention_vec.indices[:, 1]), tf.int64)
  ret_entity_vec = tf.SparseTensor(
      indices=tf.concat(
          [ret_mention_vec.indices[:, 0:1],
           tf.expand_dims(entity_indices, 1)],
          axis=1),
      values=ret_mention_vec.values,
      dense_shape=[batch_size, qa_config.num_entities])

  return ret_entity_vec, ret_mention_vec, dense_mention_vec, sp_mention_vec
コード例 #9
0
def random_spans_noise_mask(length=200,
                            noise_density=0.15,
                            mean_noise_span_length=3.0):
    """Noise mask consisting of random spans of noise tokens.
    The number of noise tokens and the number of noise spans and non-noise spans
    are determined deterministically as follows:
    num_noise_tokens = round(length * noise_density)
    num_nonnoise_spans = num_noise_spans = round(
    num_noise_tokens / mean_noise_span_length)
    Spans alternate between non-noise and noise, beginning with non-noise.
    Subject to the above restrictions, all masks are equally likely.
    Args:
    length: an int32 scalar (length of the incoming token sequence)
    noise_density: a float - approximate density of output mask
    mean_noise_span_length: a number
    Returns:
    a boolean tensor with shape [length]
    """
    orig_length = length
    # increase length to avoid degeneracy
    length = tf.maximum(length, 2)

    def to_int(x):
        return tf.cast(x, tf.int32)

    def to_float(x):
        return tf.cast(x, tf.float32)

    num_noise_tokens = to_int(tf.round(to_float(length) * noise_density))
    # avoid degeneracy by ensuring positive numbers of noise and nonnoise tokens.
    num_noise_tokens = tf.minimum(tf.maximum(num_noise_tokens, 1), length - 1)
    num_noise_spans = to_int(
        tf.round(to_float(num_noise_tokens) / mean_noise_span_length))
    # avoid degeneracy by ensuring positive number of noise spans
    num_noise_spans = tf.maximum(num_noise_spans, 1)
    num_nonnoise_tokens = length - num_noise_tokens

    # pick the lengths of the noise spans and the non-noise spans
    def _random_segmentation(num_items, num_segments):
        """Partition a sequence of items randomly into non-empty segments.
        Args:
        num_items: an integer scalar > 0
        num_segments: an integer scalar in [1, num_items]
        Returns:
        a Tensor with shape [num_segments] containing positive integers that add
        up to num_items
        """
        first_in_segment = tf.pad(
            tf.random.shuffle(
                to_int(tf.range(num_items - 1) < num_segments - 1), seed=123),
            [[1, 0]])
        segment_id = tf.cumsum(first_in_segment)
        segment_length = tf.segment_sum(tf.ones_like(segment_id), segment_id)
        return segment_length

    noise_span_lengths = _random_segmentation(num_noise_tokens,
                                              num_noise_spans)
    nonnoise_span_lengths = _random_segmentation(num_nonnoise_tokens,
                                                 num_noise_spans)
    interleaved_span_lengths = tf.reshape(
        tf.stack([nonnoise_span_lengths, noise_span_lengths], axis=1),
        [num_noise_spans * 2])
    span_starts = tf.cumsum(interleaved_span_lengths)[:-1]
    span_start_indicator = tf.unsorted_segment_sum(tf.ones_like(span_starts),
                                                   span_starts, length)
    span_num = tf.cumsum(span_start_indicator)
    is_noise = tf.equal(span_num % 2, 1)
    return is_noise[:orig_length].numpy()