def coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c, mention_doc_ids, dropout): k = util.shape(top_span_emb, 0) top_span_range = tf.range(k) # [k] antecedent_offsets = tf.expand_dims(top_span_range, 1) - tf.expand_dims( top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] antecedents = tf.maximum(antecedent_offsets, 0) # [k, k] target_doc_ids = tf.expand_dims(mention_doc_ids, 1) # [k, k] antecedent_doc_ids = tf.gather(mention_doc_ids, antecedents) # [k, k] antecedents_mask = tf.logical_and( tf.equal(target_doc_ids, antecedent_doc_ids), antecedents_mask) # [k,k] fast_antecedent_scores = tf.expand_dims( top_span_mention_scores, 1) + tf.expand_dims(top_span_mention_scores, 0) # [k, k] fast_antecedent_scores += tf.log( tf.to_float(antecedents_mask )) # [k, k] can not do masking at the end, need to sort fast_antecedent_scores += get_fast_antecedent_scores( top_span_emb, dropout) # [k, k] _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=False) # [k, c] top_antecedents_mask = util.batch_gather(antecedents_mask, top_antecedents) # [k, c] top_antecedents_mask = tf.squeeze(top_antecedents_mask, -1) top_fast_antecedent_scores = util.batch_gather(fast_antecedent_scores, top_antecedents) # [k, c] top_fast_antecedent_scores = tf.squeeze(top_fast_antecedent_scores, -1) top_antecedent_offsets = util.batch_gather(antecedent_offsets, top_antecedents) # [k, c] top_antecedent_offsets = tf.squeeze(top_antecedent_offsets, -1) return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def coarse_to_fine_pruning(self, top_span_emb, top_span_mention_scores, c): k = util.shape(top_span_emb, 0) top_span_range = tf.range(k) # [k] antecedent_offsets = tf.expand_dims(top_span_range, 1) - tf.expand_dims(top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] fast_antecedent_scores = tf.expand_dims(top_span_mention_scores, 1) + tf.expand_dims(top_span_mention_scores, 0) # [k, k] fast_antecedent_scores += tf.log(tf.to_float(antecedents_mask)) # [k, k] fast_antecedent_scores += self.get_fast_antecedent_scores(top_span_emb) # [k, k] _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=False) # [k, c] top_antecedents_mask = util.batch_gather(antecedents_mask, top_antecedents) # [k, c] top_fast_antecedent_scores = util.batch_gather(fast_antecedent_scores, top_antecedents) # [k, c] top_antecedent_offsets = util.batch_gather(antecedent_offsets, top_antecedents) # [k, c] return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def coarse_to_fine_pruning(self, top_span_emb, top_span_mention_scores, c): k = util.shape(top_span_emb, 0) top_span_range = tf.range(k) # [k] antecedent_offsets = tf.expand_dims( top_span_range, 1) - tf.expand_dims(top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] fast_antecedent_scores = tf.expand_dims( top_span_mention_scores, 1) + tf.expand_dims( top_span_mention_scores, 0) # [k, k] fast_antecedent_scores += tf.log( tf.to_float(antecedents_mask)) # [k, k] fast_antecedent_scores += self.get_fast_antecedent_scores( top_span_emb) # [k, k] if self.config['use_prior']: antecedent_distance_buckets = self.bucket_distance( antecedent_offsets) # [k, c] distance_scores = util.projection( tf.nn.dropout( tf.get_variable( "antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer( stddev=0.02)), self.dropout), 1, initializer=tf.truncated_normal_initializer( stddev=0.02)) #[10, 1] antecedent_distance_scores = tf.gather( tf.squeeze(distance_scores, 1), antecedent_distance_buckets) # [k, c] fast_antecedent_scores += antecedent_distance_scores _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=True) # [k, c] top_antecedents_mask = util.batch_gather(antecedents_mask, top_antecedents) # [k, c] top_fast_antecedent_scores = util.batch_gather( fast_antecedent_scores, top_antecedents) # [k, c] top_antecedent_offsets = util.batch_gather(antecedent_offsets, top_antecedents) # [k, c] self.top_antecedents_idx = top_antecedents self.top_antecedents_mask = top_antecedents_mask self.top_fast_antecedent_scores = top_fast_antecedent_scores self.top_antecedent_offsets = top_antecedent_offsets self.antecedent_distance_buckets = antecedent_distance_buckets self.antecedent_distance_scores = antecedent_distance_scores self.fast_antecedent_scores = fast_antecedent_scores return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def coarse_pruning(self, top_span_emb, top_span_mention_scores, c): """在取出的前k个候选span,针对每个span取出前c个antecedent,其mention score得分的组成是 1. 每个span的mention score 2. emb_i * W * emb_j的得分 3. 每个span只取前面的span作为antecedent 4. span与antecedent的距离映射为向量算个分 """ k = util.shape(top_span_emb, 0) # num_candidates top_span_range = tf.range(k) # [num_candidates, ] # antecedent_offsets: [num_candidates, num_candidates] 每两个span之间的距离,隔了几个span antecedent_offsets = tf.expand_dims( top_span_range, 1) - tf.expand_dims(top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] fast_antecedent_scores = tf.expand_dims(top_span_mention_scores, 1) + tf.expand_dims( top_span_mention_scores, 0) fast_antecedent_scores += tf.log( tf.to_float(antecedents_mask)) # [k, k] fast_antecedent_scores += self.get_fast_antecedent_scores( top_span_emb) # [k, k] if self.config['use_prior']: antecedent_distance_buckets = self.bucket_distance( antecedent_offsets) # [k, k] distance_scores = util.projection( tf.nn.dropout( tf.get_variable( "antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer( stddev=0.02)), self.dropout), 1, initializer=tf.truncated_normal_initializer( stddev=0.02)) # [10, 1] antecedent_distance_scores = tf.gather( tf.squeeze(distance_scores, 1), antecedent_distance_buckets) # [k,k] fast_antecedent_scores += antecedent_distance_scores # 取fast_antecedent_score top_k高的antecedent,每个antecedent对应的span_index _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=False) # [k, c] top_antecedents_mask = util.batch_gather( antecedents_mask, top_antecedents) # [k, c] 每个pair对应的mask top_fast_antecedent_scores = util.batch_gather( fast_antecedent_scores, top_antecedents) # [k, c] 每个pair对应的score top_antecedent_offsets = util.batch_gather( antecedent_offsets, top_antecedents) # [k, c] 每个pair对应的offset return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def _allocation(self, usage): r"""Computes allocation by sorting `usage`. This corresponds to the value a = a_t[\phi_t[j]] in the paper. Args: usage: tensor of shape `[batch_size, memory_size]` indicating current memory usage. This is equal to u_t in the paper when we only have one write head, but for multiple write heads, one should update the usage while iterating through the write heads to take into account the allocation returned by this function. Returns: Tensor of shape `[batch_size, memory_size]` corresponding to allocation. """ with tf.name_scope('allocation'): # Ensure values are not too small prior to cumprod. usage = _EPSILON + (1 - _EPSILON) * usage nonusage = 1 - usage sorted_nonusage, indices = tf.nn.top_k(nonusage, k=self._memory_size, name='sort') sorted_usage = 1 - sorted_nonusage prod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True) sorted_allocation = sorted_nonusage * prod_sorted_usage inverse_indices = util.batch_invert_permutation(indices) # This final line "unsorts" sorted_allocation, so that the indexing # corresponds to the original indexing of `usage`. return util.batch_gather(sorted_allocation, inverse_indices)
def test(self): values = np.array([[3, 1, 4, 1], [5, 9, 2, 6], [5, 3, 5, 7]]) indexs = np.array([[1, 2, 0, 3], [3, 0, 1, 2], [0, 2, 1, 3]]) target = np.array([[1, 4, 3, 1], [6, 5, 9, 2], [5, 5, 3, 7]]) result = util.batch_gather(tf.constant(values), tf.constant(indexs)) with self.test_session(): result = result.eval() self.assertAllEqual(target, result)