Exemple #1
0
 def get_mention_scores(self, span_emb, span_starts, span_ends):
     with tf.variable_scope("mention_scores"):
       span_scores = util.ffnn(span_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, 1]
     if self.config['use_prior']:
       span_width_emb = tf.get_variable("span_width_prior_embeddings", [self.config["max_span_width"], self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)) # [W, emb]
       span_width_index = span_ends - span_starts # [NC]
       with tf.variable_scope("width_scores"):
         width_scores =  util.ffnn(span_width_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [W, 1]
       width_scores = tf.gather(width_scores, span_width_index)
       span_scores += width_scores
     return span_scores
Exemple #2
0
  def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb):
    k = util.shape(top_span_emb, 0)
    c = util.shape(top_antecedents, 1)

    feature_emb_list = []

    if self.config["use_metadata"]:
      top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c]
      same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [k, c, emb]
      feature_emb_list.append(speaker_pair_emb)

      tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb]
      feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c]
      antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]), antecedent_distance_buckets) # [k, c]
      feature_emb_list.append(antecedent_distance_emb)

    feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb]

    target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb]
    similarity_emb = top_antecedent_emb * target_emb # [k, c, emb]
    target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb]

    pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb]

    with tf.variable_scope("slow_antecedent_scores"):
      slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1]
    slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c]
    return slow_antecedent_scores # [k, c]
Exemple #3
0
def get_antecedent_scores(top_span_emb, top_span_mention_scores, antecedents, config, dropout, top_fast_antecedent_scores, top_antecedent_offsets):
  k = util.shape(top_span_emb, 0)
  max_antecedents = util.shape(antecedents, 1)
  feature_emb_list = []


  if config["use_features"]:
    # target_indices = tf.range(k)  # [k]
    # antecedent_distance = tf.expand_dims(target_indices, 1) - antecedents  # [k, max_ant]
    # antecedent_distance_buckets = bucket_distance(antecedent_distance)  # [k, max_ant]
    antecedent_distance_buckets = bucket_distance(top_antecedent_offsets)
    with tf.variable_scope("features"):
      antecedent_distance_emb = tf.gather(
          tf.get_variable("antecedent_distance_emb", [10, config["feature_size"]]),
          antecedent_distance_buckets)  # [k, max_ant]
    feature_emb_list.append(antecedent_distance_emb)

  feature_emb = tf.concat(feature_emb_list, 2)  # [k, max_ant, emb]
  feature_emb = tf.nn.dropout(feature_emb, dropout)  # [k, max_ant, emb]

  antecedent_emb = tf.gather(top_span_emb, antecedents)  # [k, max_ant, emb]
  target_emb = tf.expand_dims(top_span_emb, 1)  # [k, 1, emb]
  similarity_emb = antecedent_emb * target_emb  # [k, max_ant, emb]
  target_emb = tf.tile(target_emb, [1, max_antecedents, 1])  # [k, max_ant, emb]
  pair_emb = tf.concat([target_emb, antecedent_emb, similarity_emb, feature_emb], 2)  # [k, max_ant, emb]
  with tf.variable_scope("antecedent_scores"):
    antecedent_scores = util.ffnn(pair_emb, config["ffnn_depth"], config["ffnn_size"], 1,
                                  dropout)  # [k, max_ant, 1]
    antecedent_scores = tf.squeeze(antecedent_scores, 2)  # [k, max_ant]
  # antecedent_scores += tf.expand_dims(top_span_mention_scores, 1) + tf.gather(
  #     top_span_mention_scores, antecedents)  # [k, max_ant]
  antecedent_scores += top_fast_antecedent_scores
  return antecedent_scores, antecedent_emb, pair_emb  # [k, max_ant]
Exemple #4
0
    def get_feature_attention_score(self, tmp_feature_emb,
                                    tmp_candidate_embedding, tmp_name):
        k = util.shape(tmp_feature_emb, 0)  # [k, c,
        c = util.shape(tmp_feature_emb, 1)
        tmp_feature_size = util.shape(tmp_feature_emb, 2)
        tmp_emb_size = util.shape(tmp_candidate_embedding, 2)
        overall_emb = tf.concat([tmp_candidate_embedding, tmp_feature_emb],
                                2)  # [k, c, feature_size+embedding_size]

        repeated_emb = tf.tile(
            tf.expand_dims(overall_emb, 1),
            [1, c, 1, 1])  # [k, c, c, feature_size+embedding_size]
        tiled_emb = tf.tile(
            tf.expand_dims(overall_emb, 2),
            [1, 1, c, 1])  # [k, c, c, feature_size+embedding_size]

        final_feature = tf.concat(
            [repeated_emb, tiled_emb, repeated_emb * tiled_emb],
            3)  # [k, c, c, (feature_size+embedding_size)*3]
        final_feature = tf.reshape(
            final_feature, [k, c * c, (tmp_feature_size + tmp_emb_size) * 3])
        with tf.variable_scope(tmp_name):
            feature_attention_scores = util.ffnn(final_feature,
                                                 self.config["ffnn_depth"],
                                                 self.config["ffnn_size"], 1,
                                                 self.dropout)  # [k, c*c, 1]
        feature_attention_scores = tf.reshape(feature_attention_scores,
                                              [k, c, c, 1])
        return feature_attention_scores
Exemple #5
0
  def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids):
    num_mentions = util.shape(mention_emb, 0)
    max_antecedents = util.shape(antecedents, 1)

    feature_emb_list = []

    if self.config["use_metadata"]:
      antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
      same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
      feature_emb_list.append(speaker_pair_emb)

      # tile is duplicating data [a b c d] --> [a b c d a b c d]
      tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
      feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      target_indices = tf.range(num_mentions) # [num_mentions]
      mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant]
      mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant]
      mention_distance_bins.set_shape([None, None])
      mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant]
      feature_emb_list.append(mention_distance_emb)

    if self.config["use_ner_phi"]:
      antecedent_ner_ids = tf.gather(mention_ner_ids, antecedents)
      same_ner = tf.equal(tf.expand_dims(mention_ner_ids, 1), antecedent_ner_ids)
      ner_pair_emb = tf.gather(tf.get_variable("same_ner_emb", [2, self.config["feature_size"]]), tf.to_int32(same_ner))
      feature_emb_list.append(ner_pair_emb)

    # phi(i, j)
    feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb]

    # g_i
    antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb]
    
    # g_j 
    target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb]
    
    # g_i . g_j
    similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb]

    # [g_i, g_j, g_i . g_j, phi(i, j)]
    pair_emb = tf.concat([target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb]

    with tf.variable_scope("iteration"):
      with tf.variable_scope("antecedent_scoring"):
        antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1]
    antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant]

    antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant]
    antecedent_scores += antecedent_mask # [num_mentions, max_ant]

    antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant]
    antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1]
    return antecedent_scores, pair_emb # [num_mentions, max_ant + 1]
Exemple #6
0
def get_unary_scores(span_emb, config, dropout, num_labels = 1, name="span_scores"):
  """Compute span score with FFNN(span embedding).
  Args:
    span_emb: Tensor of [num_sentences, num_spans, emb].
  """
  with tf.variable_scope(name):
    scores = util.ffnn(span_emb, config["ffnn_depth"], config["ffnn_size"], num_labels,
                       dropout)  # [num_sentences, num_spans, num_labels] or [k, num_labels]
  if num_labels == 1:
    scores = tf.squeeze(scores, -1)  # [num_sentences, num_spans] or [k]
  return scores
Exemple #7
0
  def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns):
    k = util.shape(top_span_emb, 0)
    c = util.shape(top_antecedents, 1)

    feature_emb_list = []

    if self.config["use_metadata"]:
      top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c]
      same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [k, c, emb]
      feature_emb_list.append(speaker_pair_emb)

      top_antecedent_genders = tf.gather(top_span_genders, top_antecedents)
      same_gender = ((tf.expand_dims(top_span_genders,1) * top_antecedent_genders) >= 0)
      same_gender_emb = tf.gather(tf.get_variable("same_gender_emb", [2, self.config["feature_size"]]), tf.to_int32(same_gender))
      feature_emb_list.append(same_gender_emb)

      top_antecedent_fpronouns = tf.gather(top_span_fpronouns, top_antecedents) # [k, c]
      fpronoun_count = tf.add(tf.expand_dims(top_span_fpronouns, 1), top_antecedent_fpronouns) # [k, c]
      no_same_speaker = tf.to_int32(tf.logical_not(tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids))) # [k, c]
      same_speaker_and_fp = (tf.add(fpronoun_count,no_same_speaker) < 3)
      same_speaker_and_fp_emb = tf.gather(tf.get_variable("same_speaker_and_fp_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker_and_fp))
      feature_emb_list.append(same_speaker_and_fp_emb)

      #tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb]
      #feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c]
      antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]), antecedent_distance_buckets) # [k, c]
      feature_emb_list.append(antecedent_distance_emb)

    feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb]

    target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb]
    similarity_emb = top_antecedent_emb * target_emb # [k, c, emb]
    target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb]

    target_scene_emb = tf.expand_dims(top_scene_emb, 1) # [k, 1, emb-scene]
    target_scene_emb = tf.tile(target_scene_emb, [1, c, 1]) # [k, c, emb]

    if (self.config['use_video']):
      pair_emb = tf.concat([target_scene_emb, top_antecedent_scene_emb, target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb]
    else:
      pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb]

    with tf.variable_scope("slow_antecedent_scores"):
      slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1]
    slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c]
    return slow_antecedent_scores # [k, c]
Exemple #8
0
    def get_feature_score(self, tmp_feature_emb, tmp_feature_name):
        k = util.shape(tmp_feature_emb, 0)
        c = util.shape(tmp_feature_emb, 1)
        repeated_feature_emb = tf.tile(tf.expand_dims(tmp_feature_emb, 1), [1, c, 1, 1])  # [k, c, c, feature_size]
        tiled_feature_emb = tf.tile(tf.expand_dims(tmp_feature_emb, 2), [1, 1, c, 1])  # [k, c, c, feature_size]

        final_feature = tf.concat([repeated_feature_emb, tiled_feature_emb, repeated_feature_emb * tiled_feature_emb],
                                  3)  # [k, c, c, feature_size*3]
        final_feature = tf.reshape(final_feature,
                                   [k, c * c, self.config["feature_size"] * 3])  # [k, c*c, feature_size*3]

        with tf.variable_scope(tmp_feature_name):
            tmp_feature_scores = util.ffnn(final_feature, self.config["ffnn_depth"], self.config["ffnn_size"], 1,
                                           self.dropout)  # [k, c*c, 1]
            tmp_feature_scores = tf.reshape(tmp_feature_scores, [k, c, c, 1])  # [k, c, c]
        return tmp_feature_scores
Exemple #9
0
    def get_coreference_score(self, candidate_NPs_emb, pronoun_emb, candidate_NPs_speaker_ids, pronoun_speaker_id,
                              candidate_NP_offsets, pronoun_offsets, number_features, plurality_features):
        k = util.shape(candidate_NPs_emb, 0)
        c = util.shape(candidate_NPs_emb, 1)

        feature_emb_list = []

        if self.config["use_metadata"]:
            same_speaker = tf.equal(candidate_NPs_speaker_ids, tf.tile(pronoun_speaker_id, [1, c]))  # [k, c]
            speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]),
                                         tf.to_int32(same_speaker))  # [k, c, emb]
            feature_emb_list.append(speaker_pair_emb)

        if self.config["use_features"]:
            antecedent_distance_buckets = self.bucket_distance(
                tf.nn.relu(tf.tile(pronoun_speaker_id, [1, c]) - candidate_NP_offsets))  # [k, c]
            antecedent_distance_emb = tf.gather(
                tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]),
                antecedent_distance_buckets)  # [c, emb]
            feature_emb_list.append(antecedent_distance_emb)

        if self.config['knowledge_as_feature']:
            number_emb = tf.gather(tf.get_variable("number_emb", [2, self.config["feature_size"]]),
                                   number_features)  # [k, c, feature_size]
            plurality_emb = tf.gather(tf.get_variable("plurality_emb", [2, self.config["feature_size"]]),
                                      plurality_features)  # [k, c, feature_size]
            feature_emb_list.append(number_emb)
            feature_emb_list.append(plurality_emb)

        feature_emb = tf.concat(feature_emb_list, 2)  # [k, c, emb]  [?, ?, 40]
        feature_emb = tf.nn.dropout(feature_emb, self.dropout)  # [k, c, emb]

        target_emb = tf.tile(pronoun_emb, [1, c, 1])  # [k, c, emb]
        similarity_emb = candidate_NPs_emb * target_emb  # [k, c, emb]

        # candidate_emb + pronoun_emb * candidate_emb + pronoun_emb
        pair_emb = tf.concat([target_emb, candidate_NPs_emb, similarity_emb, feature_emb], 2)  # [k, c, emb]

        with tf.variable_scope("slow_antecedent_scores"):
            slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1,
                                               self.dropout)  # [k, c, 1]
        slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2)  # [k, c]
        return slow_antecedent_scores  # [c]
Exemple #10
0
  def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb):
    num_mentions = util.shape(mention_emb, 0)
    max_antecedents = util.shape(antecedents, 1)

    feature_emb_list = []

    if self.config["use_metadata"]:
      antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
      same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
      feature_emb_list.append(speaker_pair_emb)

      tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
      feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      target_indices = tf.range(num_mentions) # [num_mentions]
      mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant]
      mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant]
      mention_distance_bins.set_shape([None, None])
      mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant]
      feature_emb_list.append(mention_distance_emb)

    feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb]

    antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb]
    target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb]
    similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb]

    pair_emb = tf.concat([target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb]

    with tf.variable_scope("iteration"):
      with tf.variable_scope("antecedent_scoring"):
        antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1]
    antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant]

    antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant]
    antecedent_scores += antecedent_mask # [num_mentions, max_ant]

    antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant]
    antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1]
    return antecedent_scores  # [num_mentions, max_ant + 1]
Exemple #11
0
  def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance=None):
    k = util.shape(top_span_emb, 0)
    c = util.shape(top_antecedents, 1)

    feature_emb_list = []

    if self.config["use_metadata"]:
      top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c]
      same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), tf.to_int32(same_speaker)) # [k, c, emb]
      feature_emb_list.append(speaker_pair_emb)

      tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb]
      feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c]
      antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), antecedent_distance_buckets) # [k, c]
      feature_emb_list.append(antecedent_distance_emb)
    if segment_distance is not None:
      with tf.variable_scope('segment_distance', reuse=tf.AUTO_REUSE):
        segment_distance_emb = tf.gather(tf.get_variable("segment_distance_embeddings", [self.config['max_training_sentences'], self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), segment_distance) # [k, emb]
      feature_emb_list.append(segment_distance_emb)

    feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb]

    target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb]
    similarity_emb = top_antecedent_emb * target_emb # [k, c, emb]
    target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb]

    pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb]

    with tf.variable_scope("slow_antecedent_scores"):
      slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1]
    slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c]
    return slow_antecedent_scores # [k, c]
    def get_antecedent_scores(self, mention_emb, mention_scores, antecedents,
                              antecedents_len, mention_starts, mention_ends,
                              mention_speaker_ids, genre_emb, text_emb,
                              text_outputs, context_pre_starts,
                              context_pos_ends):
        num_mentions = util.shape(mention_emb, 0)
        max_antecedents = util.shape(antecedents, 1)

        feature_emb_list = []

        if self.config["use_metadata"]:
            antecedent_speaker_ids = tf.gather(
                mention_speaker_ids, antecedents)  # [num_mentions, max_ant]
            same_speaker = tf.equal(
                tf.expand_dims(mention_speaker_ids, 1),
                antecedent_speaker_ids)  # [num_mentions, max_ant]
            speaker_pair_emb = tf.gather(
                tf.get_variable("same_speaker_emb",
                                [2, self.config["feature_size"]]),
                tf.to_int32(same_speaker))  # [num_mentions, max_ant, emb]
            feature_emb_list.append(speaker_pair_emb)

            tiled_genre_emb = tf.tile(
                tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
                [num_mentions, max_antecedents, 1
                 ])  # [num_mentions, max_ant, emb]
            feature_emb_list.append(tiled_genre_emb)

        if self.config["use_features"]:
            target_indices = tf.range(num_mentions)  # [num_mentions]
            mention_distance = tf.expand_dims(
                target_indices, 1) - antecedents  # [num_mentions, max_ant]
            mention_distance_bins = coref_ops.distance_bins(
                mention_distance)  # [num_mentions, max_ant]
            mention_distance_bins.set_shape([None, None])
            mention_distance_emb = tf.gather(
                tf.get_variable("mention_distance_emb",
                                [10, self.config["feature_size"]]),
                mention_distance_bins)  # [num_mentions, max_ant]
            feature_emb_list.append(mention_distance_emb)

        feature_emb = tf.concat(feature_emb_list,
                                2)  # [num_mentions, max_ant, emb]
        feature_emb = tf.nn.dropout(
            feature_emb, self.dropout)  # [num_mentions, max_ant, emb]

        ########### Context Embeddings #################

        context_pre_ends = mention_starts - 1
        context_pos_starts = mention_ends + 1

        context_pre_width = mention_starts - context_pre_starts
        context_pos_width = context_pos_ends - mention_ends

        context_start_emb = tf.gather(text_outputs, context_pre_starts)
        context_end_emb = tf.gather(text_outputs, context_pos_ends)

        context_output = tf.concat([context_start_emb, context_end_emb], 1)
        context_output = tf.tile(tf.expand_dims(context_output, 1),
                                 [1, self.config["max_context_width"], 1])

        mention_output = tf.tile(tf.expand_dims(mention_emb, 1),
                                 [1, self.config["max_context_width"], 1])

        # context_width = 1 + context_ends - context_starts
        context_pre_indices = tf.expand_dims(
            tf.range(
                self.config["max_context_width"] / 2), 0) + tf.expand_dims(
                    context_pre_starts, 1)  # [num_mentions, max_mention_width]
        context_pre_indices = tf.minimum(
            util.shape(text_outputs, 0) - 1,
            context_pre_indices)  # [num_mentions, max_mention_width]
        context_pre_mask = tf.expand_dims(
            tf.sequence_mask(context_pre_width,
                             self.config["max_context_width"] / 2,
                             dtype=tf.float32),
            2)  # [num_mentions, max_mention_width, 1]

        context_pos_indices = tf.expand_dims(
            tf.range(
                self.config["max_context_width"] / 2), 0) + tf.expand_dims(
                    context_pos_starts, 1)  # [num_mentions, max_mention_width]
        context_pos_indices = tf.minimum(
            util.shape(text_outputs, 0) - 1,
            context_pos_indices)  # [num_mentions, max_mention_width]
        context_pos_mask = tf.expand_dims(
            tf.sequence_mask(context_pos_width,
                             self.config["max_context_width"] / 2,
                             dtype=tf.float32),
            2)  # [num_mentions, max_mention_width, 1]

        context_indices = tf.concat([context_pre_indices, context_pos_indices],
                                    1)
        context_mask = tf.concat([context_pre_mask, context_pos_mask], 1)

        context_glove_emb = tf.gather(text_emb, context_indices)

        context_att_score = util.projection_name(
            tf.concat([context_glove_emb, context_output, mention_output], 2),
            1, "context_att")

        context_attention = tf.nn.softmax(
            context_att_score + tf.log(context_mask),
            dim=1)  # [num_mentions, max_mention_width, 1]

        context_emb = tf.reduce_sum(context_attention * context_glove_emb,
                                    1)  # [num_mentions, emb]

        mention_emb = tf.concat([context_emb, mention_emb], 1)

        ################################################

        antecedent_emb = tf.gather(mention_emb,
                                   antecedents)  # [num_mentions, max_ant, emb]
        self.mention_emb_shape = tf.shape(mention_emb)
        self.mention_start_shape = tf.shape(antecedents)
        target_emb_tiled = tf.tile(
            tf.expand_dims(mention_emb, 1),
            [1, max_antecedents, 1])  # [num_mentions, max_ant, emb]
        similarity_emb = antecedent_emb * target_emb_tiled  # [num_mentions, max_ant, emb]

        pair_emb = tf.concat(
            [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb],
            2)  # [num_mentions, max_ant, emb]

        with tf.variable_scope("iteration"):
            with tf.variable_scope("antecedent_scoring"):
                antecedent_scores = util.ffnn(
                    pair_emb, self.config["ffnn_depth"],
                    self.config["ffnn_size"], 1,
                    self.dropout)  # [num_mentions, max_ant, 1]
        antecedent_scores = tf.squeeze(antecedent_scores,
                                       2)  # [num_mentions, max_ant]

        antecedent_mask = tf.log(
            tf.sequence_mask(antecedents_len,
                             max_antecedents,
                             dtype=tf.float32))  # [num_mentions, max_ant]
        antecedent_scores += antecedent_mask  # [num_mentions, max_ant]

        antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(
            mention_scores, antecedents)  # [num_mentions, max_ant]
        antecedent_scores = tf.concat(
            [tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores],
            1)  # [num_mentions, max_ant + 1]
        return antecedent_scores  # [num_mentions, max_ant + 1]
Exemple #13
0
 def get_mention_scores(self, mention_emb):
   with tf.variable_scope("mention_scores"):
     return util.ffnn(mention_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, 1]
Exemple #14
0
    def get_mention_proposal_and_loss(self,
                                      input_ids,
                                      input_mask,
                                      text_len,
                                      speaker_ids,
                                      genre,
                                      is_training,
                                      gold_starts,
                                      gold_ends,
                                      cluster_ids,
                                      sentence_map,
                                      span_mention=None):
        """get mention proposals"""

        start_end_loss_mask = tf.cast(
            tf.where(tf.cast(
                tf.math.greater_equal(input_ids, tf.zeros_like(input_ids)),
                tf.bool),
                     x=tf.ones_like(input_ids),
                     y=tf.zeros_like(input_ids)), tf.float32)
        input_ids = tf.where(tf.cast(
            tf.math.greater_equal(input_ids, tf.zeros_like(input_ids)),
            tf.bool),
                             x=input_ids,
                             y=tf.zeros_like(input_ids))
        input_mask = tf.where(tf.cast(
            tf.math.greater_equal(input_mask, tf.zeros_like(input_mask)),
            tf.bool),
                              x=input_mask,
                              y=tf.zeros_like(input_mask))
        text_len = tf.where(tf.cast(
            tf.math.greater_equal(text_len, tf.zeros_like(text_len)), tf.bool),
                            x=text_len,
                            y=tf.zeros_like(text_len))
        speaker_ids = tf.where(tf.cast(
            tf.math.greater_equal(speaker_ids, tf.zeros_like(speaker_ids)),
            tf.bool),
                               x=speaker_ids,
                               y=tf.zeros_like(speaker_ids))
        gold_starts = tf.where(tf.cast(
            tf.math.greater_equal(gold_starts, tf.zeros_like(gold_starts)),
            tf.bool),
                               x=gold_starts,
                               y=tf.zeros_like(gold_starts))
        gold_ends = tf.where(tf.cast(
            tf.math.greater_equal(gold_ends, tf.zeros_like(gold_ends)),
            tf.bool),
                             x=gold_ends,
                             y=tf.zeros_like(gold_ends))
        cluster_ids = tf.where(tf.cast(
            tf.math.greater_equal(cluster_ids, tf.zeros_like(cluster_ids)),
            tf.bool),
                               x=cluster_ids,
                               y=tf.zeros_like(cluster_ids))
        sentence_map = tf.where(tf.cast(
            tf.math.greater_equal(sentence_map, tf.zeros_like(sentence_map)),
            tf.bool),
                                x=sentence_map,
                                y=tf.zeros_like(sentence_map))
        span_mention = tf.where(tf.cast(
            tf.math.greater_equal(span_mention, tf.zeros_like(span_mention)),
            tf.bool),
                                x=span_mention,
                                y=tf.zeros_like(span_mention))
        span_mention_loss_mask = tf.cast(
            tf.where(tf.cast(
                tf.math.greater_equal(span_mention,
                                      tf.zeros_like(span_mention)), tf.bool),
                     x=tf.ones_like(span_mention),
                     y=tf.zeros_like(span_mention)), tf.float32)
        # span

        # gold_starts -> [1, 3, 5, 8, -1, -1, -1, -1] -> [1, 3, 5, 8, 0, 0, 0, 0]

        input_ids = tf.reshape(input_ids,
                               [-1, self.config["max_segment_len"]
                                ])  # (max_train_sent, max_segment_len)
        input_mask = tf.reshape(input_mask,
                                [-1, self.config["max_segment_len"]
                                 ])  # (max_train_sent, max_segment_len)
        text_len = tf.reshape(text_len, [-1])  # (max_train_sent)
        speaker_ids = tf.reshape(speaker_ids,
                                 [-1, self.config["max_segment_len"]
                                  ])  # (max_train_sent, max_segment_len)
        sentence_map = tf.reshape(sentence_map,
                                  [-1])  # (max_train_sent * max_segment_len)
        cluster_ids = tf.reshape(cluster_ids,
                                 [-1])  # (max_train_sent * max_segment_len)
        gold_starts = tf.reshape(gold_starts,
                                 [-1])  # (max_train_sent * max_segment_len)
        gold_ends = tf.reshape(gold_ends,
                               [-1])  # (max_train_sent * max_segment_len)
        span_mention = tf.reshape(span_mention, [
            self.config["max_training_sentences"],
            self.config["max_segment_len"] * self.config["max_segment_len"]
        ])
        # span_mention : (max_train_sent, max_segment_len, max_segment_len)

        model = modeling.BertModel(config=self.bert_config,
                                   is_training=is_training,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   use_one_hot_embeddings=False,
                                   scope='bert')

        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        mention_doc = model.get_sequence_output(
        )  # (max_train_sent, max_segment_len, hidden)
        mention_doc = self.flatten_emb_by_sentence(
            mention_doc, input_mask
        )  # (max_train_sent, max_segment_len, emb) -> (max_train_sent * max_segment_len, e) 取出有效token的emb
        num_words = util.shape(mention_doc,
                               0)  # max_train_sent * max_segment_len

        seg_mention_doc = tf.reshape(mention_doc, [
            self.config["max_training_sentences"],
            self.config["max_segment_len"], -1
        ])  # (max_train_sent, max_segment_len, embed)
        start_seg_mention_doc = tf.stack(
            [seg_mention_doc] * self.config["max_segment_len"], axis=1
        )  # (max_train_sent, 1, max_segment_len, embed) -> (max_train_sent, max_segment_len, max_segment_len, embed)
        end_seg_mention_doc = tf.stack(
            [
                seg_mention_doc,
            ] * self.config["max_segment_len"], axis=2
        )  # (max_train_sent, max_segment_len, 1, embed) -> (max_train_sent, max_segment_len, max_segment_len, embed)
        span_mention_doc = tf.concat(
            [start_seg_mention_doc, end_seg_mention_doc], axis=-1
        )  # (max_train_sent, max_segment_len, max_segment_len, embed * 2)
        span_mention_doc = tf.reshape(span_mention_doc,
                                      (self.config["max_training_sentences"] *
                                       self.config["max_segment_len"] *
                                       self.config["max_segment_len"], -1))
        # # (max_train_sent * max_segment_len * max_segment_len, embed * 2)

        with tf.variable_scope("span_scores",
                               reuse=tf.AUTO_REUSE):  # [k, 1] 每个候选span的得分
            span_scores = util.ffnn(
                span_mention_doc, self.config["ffnn_depth"],
                self.config["ffnn_size"] * 2, 1,
                self.dropout)  # (max_train_sent, max_segment_len, 1)
        with tf.variable_scope("start_scores",
                               reuse=tf.AUTO_REUSE):  # [k, 1] 每个候选span的得分
            start_scores = util.ffnn(
                mention_doc, self.config["ffnn_depth"],
                self.config["ffnn_size"], 1,
                self.dropout)  # (max_train_sent, max_segment_len, 1)
        with tf.variable_scope("end_scores",
                               reuse=tf.AUTO_REUSE):  # [k, 1] 每个候选span的得分
            end_scores = util.ffnn(
                mention_doc, self.config["ffnn_depth"],
                self.config["ffnn_size"], 1,
                self.dropout)  # (max_train_sent, max_segment_len, 1)

        gold_start_label = tf.reshape(gold_starts, [-1, 1])
        # gold_starts -> [1, 3, 5, 8, -1, -1, -1, -1]
        start_value = tf.reshape(tf.ones_like(gold_starts), [-1])
        start_shape = tf.constant([
            self.config["max_training_sentences"] *
            self.config["max_segment_len"]
        ])
        gold_start_label = tf.cast(
            tf.scatter_nd(gold_start_label, start_value, start_shape),
            tf.int32)
        # gold_start_label = tf.boolean_mask(gold_start_label, tf.reshape(input_mask, [-1]))

        gold_end_label = tf.reshape(gold_ends, [-1, 1])
        end_value = tf.reshape(tf.ones_like(gold_ends), [-1])
        end_shape = tf.constant([
            self.config["max_training_sentences"] *
            self.config["max_segment_len"]
        ])
        gold_end_label = tf.cast(
            tf.scatter_nd(gold_end_label, end_value, end_shape), tf.int32)
        # gold_end_label = tf.boolean_mask(gold_end_label, tf.reshape(input_mask, [-1]))
        start_scores = tf.cast(tf.reshape(tf.sigmoid(start_scores), [-1]),
                               tf.float32)
        end_scores = tf.cast(tf.reshape(tf.sigmoid(end_scores), [-1]),
                             tf.float32)
        span_scores = tf.cast(tf.reshape(tf.sigmoid(span_scores), [-1]),
                              tf.float32)
        # span_mention = tf.cast(span_mention, tf.float32)

        start_scores = tf.stack([(1 - start_scores), start_scores], axis=-1)
        end_scores = tf.stack([(1 - end_scores), end_scores], axis=-1)
        span_scores = tf.stack([(1 - span_scores), span_scores], axis=-1)

        gold_start_label = tf.cast(
            tf.one_hot(tf.reshape(gold_start_label, [-1]), 2, axis=-1),
            tf.float32)
        gold_end_label = tf.cast(
            tf.one_hot(tf.reshape(gold_end_label, [-1]), 2, axis=-1),
            tf.float32)
        span_mention = tf.cast(
            tf.one_hot(tf.reshape(span_mention, [-1]), 2, axis=-1), tf.float32)

        start_end_loss_mask = tf.reshape(start_end_loss_mask, [-1])
        start_loss = self.bce_loss(y_pred=start_scores,
                                   y_true=gold_start_label)
        end_loss = self.bce_loss(y_pred=end_scores, y_true=gold_end_label)
        span_loss = self.bce_loss(y_pred=span_scores, y_true=span_mention)

        start_loss = tf.reduce_mean(
            tf.multiply(start_loss, tf.cast(start_end_loss_mask, tf.float32)))
        end_loss = tf.reduce_mean(
            tf.multiply(end_loss, tf.cast(start_end_loss_mask, tf.float32)))
        span_loss = tf.reduce_mean(
            tf.multiply(span_loss, tf.cast(span_mention_loss_mask,
                                           tf.float32)))

        if span_mention is None:
            loss = self.config["start_ratio"] * start_loss + self.config[
                "end_ratio"] * end_loss
            return loss, start_scores, end_scores
        else:
            loss = self.config["start_ratio"] * start_loss + self.config[
                "end_ratio"] * end_loss + self.config[
                    "mention_ratio"] * span_loss
            return loss, start_scores, end_scores, span_scores
  def get_context_antecedent_scores(self,
                                    mention_emb,
                                    mention_scores,
                                    antecedents,
                                    antecedents_len,
                                    mention_starts,
                                    mention_ends,
                                    mention_speaker_ids,
                                    genre_emb,
                                    context_starts,
                                    context_ends,
                                    text_outputs,
                                    text_emb):
    num_mentions = util.shape(mention_emb, 0)
    max_antecedents = util.shape(antecedents, 1)

    self.num_words = tf.shape(text_outputs)
    self.num_mentions = num_mentions

    feature_emb_list = []

    if self.config["use_metadata"]:
      antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
      same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant]
      speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
      feature_emb_list.append(speaker_pair_emb)

      tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
      feature_emb_list.append(tiled_genre_emb)

    if self.config["use_features"]:
      target_indices = tf.range(num_mentions) # [num_mentions]
      mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant]
      mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant]
      mention_distance_bins.set_shape([None, None])
      mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant]
      feature_emb_list.append(mention_distance_emb)

    feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb]
    feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb]


    #############################
    #
    # Get matrix for co-attention
    #
    #############################
    

    ####### Mention Level #######
    
    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]

    mention_features = tf.concat([mention_start_emb, mention_end_emb], 1)
    
    mention_width = 1 + mention_ends - mention_starts # [num_mentions]
    mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width]
    mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width]
    mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]

    antecedent_indices = tf.gather(mention_indices, antecedents)
    antecedent_mask = tf.gather(mention_mask, antecedents)
    antecedent_indices_emb = tf.gather(text_outputs, antecedent_indices)

    target_indices = tf.tile(tf.expand_dims(mention_indices, 1), [1, max_antecedents, 1])
    target_mask = tf.tile(tf.expand_dims(mention_mask, 1), [1, max_antecedents, 1, 1])
    target_indices_emb = tf.gather(text_outputs, target_indices)


    ####### Context Level #######

    context_start_emb = tf.gather(text_outputs, context_starts)
    context_end_emb = tf.gather(text_outputs, context_ends)

    context_width = 1 + context_ends - context_starts
    context_indices = tf.expand_dims(tf.range(self.config["max_context_width"]), 0) + tf.expand_dims(context_starts, 1) # [num_mentions, max_mention_width]
    context_indices = tf.minimum(util.shape(text_outputs, 0) - 1, context_indices) # [num_mentions, max_mention_width]
    context_mask = tf.expand_dims(tf.sequence_mask(context_width, self.config["max_context_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]

    antecedent_context_indices = tf.gather(context_indices, antecedents)
    antecedent_context_mask = tf.gather(context_mask, antecedents)
    antecedent_context_indices_emb = tf.gather(text_outputs, antecedent_context_indices)

    target_context_indices = tf.tile(tf.expand_dims(context_indices, 1), [1, max_antecedents, 1])
    target_context_mask = tf.tile(tf.expand_dims(context_mask, 1), [1, max_antecedents, 1, 1])
    target_context_indices_emb = tf.gather(text_outputs, target_context_indices)


    #### Initial Embeddings #####
    
    antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb]
    target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb]
    
    context_emb = tf.concat([context_start_emb, context_end_emb], 1)

    antecedent_context_emb = tf.gather(context_emb, antecedents) # [num_mentions, max_ant, emb]
    target_context_emb_tiled = tf.tile(tf.expand_dims(context_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb]

    similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb]
    

    #############################
    #
    # Calculate Co-attention
    #
    #############################


    ###### C_a Attention ########

    window_emb = tf.concat([antecedent_emb, target_emb_tiled, target_context_emb_tiled], 2)
    window_scores = util.projection_name(window_emb, 100, 'c_a_window')
    window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_context_width'], 1])

    target_scores = util.projection_name(antecedent_context_indices_emb, 100, 'c_a_target')

    temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score')

    temp_att = tf.nn.softmax(temp_scores + tf.log(antecedent_context_mask), dim=2) # [num_mentions, max_mention_width, 1]
    antecedent_context_emb = tf.reduce_sum(temp_att * tf.gather(text_emb, antecedent_context_indices), 2)


    ###### C_t Attention ########

    window_emb = tf.concat([antecedent_emb, target_emb_tiled, antecedent_context_emb], 2)
    window_scores = util.projection_name(window_emb, 100, 'c_t_window')
    window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_context_width'], 1])

    target_scores = util.projection_name(target_context_indices_emb, 100, 'c_t_target')

    temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score') 

    temp_att = tf.nn.softmax(temp_scores + tf.log(target_context_mask), dim=2) # [num_mentions, max_mention_width, 1]
    target_context_emb_tiled = tf.reduce_sum(temp_att * tf.gather(text_emb, target_context_indices), 2)

    
    ###### M_t Attention ########

    window_emb = tf.concat([antecedent_emb, antecedent_context_emb, target_context_emb_tiled], 2)
    window_scores = util.projection_name(window_emb, 100, 'm_t_window')
    window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_mention_width'], 1])

    target_scores = util.projection_name(target_indices_emb, 100, 'm_t_target')

    temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score')
    
    temp_att = tf.nn.softmax(temp_scores + tf.log(target_mask), dim=2) # [num_mentions, max_mention_width, 1]
    target_emb_tiled = tf.reduce_sum(temp_att * tf.gather(text_emb, target_indices), 2)


    ###### M_a Attention ########

    window_emb = tf.concat([target_emb_tiled, target_context_emb_tiled, antecedent_context_emb], 2)
    window_scores = util.projection_name(window_emb, 100, 'm_a_window')
    window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_mention_width'], 1])

    target_scores = util.projection_name(antecedent_indices_emb, 100, 'm_a_target')

    temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score')
  
    temp_att = tf.nn.softmax(temp_scores + tf.log(antecedent_mask), dim=2) # [num_mentions, max_mention_width, 1]
    antecedent_emb = tf.reduce_sum(temp_att * tf.gather(text_emb, antecedent_indices), 2)
    

    #############################
    #
    # Calculate Pair Embeddings
    #
    #############################

    antecedent_feature = tf.gather(mention_features, antecedents) # [num_mentions, max_ant, emb]
    target_feature = tf.tile(tf.expand_dims(mention_features, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb]
    # similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb]

    # pair_emb = tf.concat([target_emb_tiled_1, antecedent_emb_1, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb]

    pair_emb = tf.concat([
                          target_feature,
                          target_emb_tiled,
                          antecedent_feature,
                          antecedent_emb,
                          antecedent_context_emb,
                          target_context_emb_tiled,
                          similarity_emb,
                          feature_emb], 2)

    '''
    pair_emb = tf.nn.relu(util.projection_name(target_emb_tiled, self.config['ffnn_size'], 'comp_mt') +\
                util.projection_name(antecedent_emb, self.config['ffnn_size'], 'comp_ma') +\
                util.projection_name(antecedent_context_emb_1, self.config['ffnn_size'], 'comp_ca') +\
                util.projection_name(target_context_emb_tiled_1, self.config['ffnn_size'], 'comp_ct') +\
                util.projection_name(similarity_emb, self.config['ffnn_size'], 'comp_sim') +\
                util.projection_name(feature_emb, self.config['ffnn_size'], 'comp_feature'))
    '''

    #############################

    with tf.variable_scope("iteration"):
      with tf.variable_scope("antecedent_scoring"):
        antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1]
    antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant]

    antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant]
    antecedent_scores += antecedent_mask # [num_mentions, max_ant]

    antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant]
    antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1]
    return antecedent_scores  # [num_mentions, max_ant + 1]
Exemple #16
0
  def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns):
    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
    self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
    self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

    num_sentences = tf.shape(context_word_emb)[0]
    max_sentence_length = tf.shape(context_word_emb)[1]

    context_emb_list = [context_word_emb]
    head_emb_list = [head_word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      context_emb_list.append(aggregated_char_emb)
      head_emb_list.append(aggregated_char_emb)

    if not self.lm_file:
      elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
      lm_embeddings = elmo_module(
          inputs={"tokens": tokens, "sequence_len": text_len},
          signature="tokens", as_dict=True)
      word_emb = lm_embeddings["word_emb"]  # [num_sentences, max_sentence_length, 512]
      lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1),
                         lm_embeddings["lstm_outputs1"],
                         lm_embeddings["lstm_outputs2"]], -1)  # [num_sentences, max_sentence_length, 1024, 3]
    lm_emb_size = util.shape(lm_emb, 2)
    lm_num_layers = util.shape(lm_emb, 3)
    with tf.variable_scope("lm_aggregation"):
      self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
      self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))
    flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
    flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1]
    aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
    aggregated_lm_emb *= self.lm_scaling
    context_emb_list.append(aggregated_lm_emb)

    context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

    context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb]
    num_words = util.shape(context_outputs, 0)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words]

    candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width]
    candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width]
    
    #debug
    prev_can_st = candidate_starts
    prev_can_ends = candidate_ends
    #debug

    candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width]
    candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width]
    candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width]
    flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
    candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates]

    combined_candidate_st = candidate_starts*10000 + candidate_ends
    combined_gold_st = gold_starts*10000 + gold_ends

    _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions]
    whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates]
    gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions]


    candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates]

    candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates]

    candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]


    #Video Scene Emb
    ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100]
    candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100]

    '''
    #Comment : This part is for calculating mention scores and prnunign metnion
    #It is not used for this task, because mention boundary are given.

    candidate_mention_scores =  self.get_mention_scores(candidate_span_emb) # [k, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"]))
    top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0),
                                               tf.expand_dims(candidate_starts, 0),
                                               tf.expand_dims(candidate_ends, 0),
                                               tf.expand_dims(k, 0),
                                               util.shape(context_outputs, 0),
                                               True) # [1, k]
    top_span_indices.set_shape([1, None])
    top_span_indices = tf.squeeze(top_span_indices, 0) # [k]
    '''

    ######## Only Using Gold Span Indices #####
    k = tf.to_int32(util.shape(gold_span_indices,0))
    top_span_indices = gold_span_indices
    ############

    top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
    top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
    top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
    top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene]

    top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k]
    #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
    top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k]
    top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]
    top_span_genders = tf.gather(genders, top_span_ends)
    top_span_fpronouns = tf.gather(fpronouns, top_span_ends)

    # k : total number of candidates span (M in paper)
    # c : how many antecedents we check (K in paper)
    c = tf.minimum(self.config["max_top_antecedents"], k)

    if self.config["coarse_to_fine"]:
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c)
    else:
      #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c)
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c)

    dummy_scores = tf.zeros([k, 1]) # [k, 1]
    for i in range(self.config["coref_depth"]):
      with tf.variable_scope("coref_layer", reuse=(i > 0)):
        top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene]
        top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c]
        top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1]
        top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb]
        attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb]
        with tf.variable_scope("f"):
          f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb]
          top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb]

    top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1]

    top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c]
    top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c]
    same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c]
    non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1]
    pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범
    dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1]
    top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1]

    top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1]
    if (self.config["use_gender_logic_rule"]):
      top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k)
      '''
      marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1)
      gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      top_antecedent_scores = top_antecedent_prob      
      '''
      origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      origin_loss = tf.reduce_sum(origin_loss)

      # cross_entropy : -1 * ground_truth * log(prediction)
      #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores))
      teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1]))

      pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) 

      # For Validation Loss
      marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1)
      validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      
      #loss = teacher_loss + origin_loss
      loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss)

      top_antecedent_scores = top_antecedent_prob_with_logic
    else:
      loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      loss = tf.reduce_sum(loss) # []
      teacher_loss = loss
      origin_loss = loss

    return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
Exemple #17
0
    def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb,
                                 char_index, text_len, speaker_ids, genre,
                                 is_training, gold_starts, gold_ends,
                                 cluster_ids, bridging_ante_cids, is_status,
                                 us_mask):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if self.lm_file:
            lm_emb_size = util.shape(lm_emb, 2)
            lm_num_layers = util.shape(lm_emb, 3)
            with tf.variable_scope("lm_aggregation"):
                self.lm_weights = tf.nn.softmax(
                    tf.get_variable("lm_scores", [lm_num_layers],
                                    initializer=tf.constant_initializer(0.0)))
                self.lm_scaling = tf.get_variable(
                    "lm_scaling", [], initializer=tf.constant_initializer(1.0))
            flattened_lm_emb = tf.reshape(lm_emb, [
                num_sentences * max_sentence_length * lm_emb_size,
                lm_num_layers
            ])
            flattened_aggregated_lm_emb = tf.matmul(
                flattened_lm_emb, tf.expand_dims(
                    self.lm_weights,
                    1))  # [num_sentences * max_sentence_length * emb, 1]
            aggregated_lm_emb = tf.reshape(
                flattened_aggregated_lm_emb,
                [num_sentences, max_sentence_length, lm_emb_size])
            aggregated_lm_emb *= self.lm_scaling
            context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        top_span_starts = gold_starts
        top_span_ends = gold_ends
        top_span_cluster_ids = cluster_ids

        top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs,
                                         top_span_starts, top_span_ends)
        top_span_mention_scores = tf.zeros_like(gold_starts,
                                                dtype=tf.float32)  # [k]

        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)
        top_span_bridging_ante_cids = bridging_ante_cids
        top_us_mask = us_mask
        top_is_status = is_status

        k = util.shape(top_span_starts, 0)

        c = tf.minimum(self.config["max_top_antecedents"], k)

        top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
            top_span_emb, top_span_mention_scores, c)

        top_antecedent_emb = tf.gather(top_span_emb,
                                       top_antecedents)  # [k, c, emb]

        pair_emb = self.get_pair_embeddings(top_span_emb, top_antecedents,
                                            top_antecedent_emb,
                                            top_antecedent_offsets,
                                            top_span_speaker_ids,
                                            genre_emb)  # [k, c,emb]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]

        shared_depth = 0
        if self.config["shared_depth"] > 0:
            flattened_pair_emb = tf.reshape(
                pair_emb, [k * c, util.shape(pair_emb, 2)])
            shared_depth = min(self.config["shared_depth"],
                               self.config["ffnn_depth"])
            for i in range(shared_depth):
                hidden_weights = tf.get_variable(
                    "shared_hidden_weights_{}".format(i), [
                        util.shape(flattened_pair_emb, 1),
                        self.config["ffnn_size"]
                    ])
                hidden_bias = tf.get_variable(
                    "shared_hidden_bias_{}".format(i),
                    [self.config["ffnn_size"]])
                flattened_pair_emb = tf.nn.relu(
                    tf.nn.xw_plus_b(flattened_pair_emb, hidden_weights,
                                    hidden_bias))
                flattened_pair_emb = tf.nn.dropout(flattened_pair_emb,
                                                   self.dropout)
            pair_emb = tf.reshape(flattened_pair_emb,
                                  [k, c, self.config["ffnn_size"]])

        ante_score_list = []
        pairwise_label_list = []
        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        ante_score_list.append(dummy_scores)

        with tf.variable_scope("slow_bridging_scores"):
            slow_bridging_scores = util.ffnn(
                pair_emb, self.config["ffnn_depth"] - shared_depth,
                self.config["ffnn_size"], 1, self.dropout)  # [k, c, 1]
            slow_bridging_scores = tf.squeeze(slow_bridging_scores,
                                              2)  # [k, c]
            top_bridging_scores = slow_bridging_scores + top_fast_antecedent_scores
            ante_score_list.append(top_bridging_scores)

        bridging_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                              tf.expand_dims(
                                                  top_span_bridging_ante_cids,
                                                  1))  # [k, c]
        non_dummy_bridging_indicator = tf.expand_dims(
            top_span_bridging_ante_cids > 0, 1)  # [k, 1]

        bridging_pairwise_labels = tf.logical_and(
            bridging_cluster_indicator, non_dummy_bridging_indicator)  # [k, c]
        pairwise_label_list.append(bridging_pairwise_labels)

        if self.config["train_with_coref"]:
            with tf.variable_scope("slow_coreference_scores"):
                slow_coref_scores = util.ffnn(
                    pair_emb, self.config["ffnn_depth"] - shared_depth,
                    self.config["ffnn_size"], 1, self.dropout)  # [k, c, 1]
                slow_coref_scores = tf.squeeze(slow_coref_scores, 2)  # [k, c]
                top_coref_scores = slow_coref_scores + top_fast_antecedent_scores
                ante_score_list.append(top_coref_scores)

            coref_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                               tf.expand_dims(
                                                   top_span_cluster_ids,
                                                   1))  # [k,c]

            non_dummy_coref_indicator = tf.expand_dims(
                top_span_cluster_ids > 0, 1)  # [k,1]

            coref_pairwise_labels = tf.logical_and(
                coref_cluster_indicator, non_dummy_coref_indicator)  # [k,c]
            pairwise_label_list.append(coref_pairwise_labels)

        top_antecedent_scores = tf.concat(ante_score_list,
                                          1)  # [k, c + 1] or [k, 2*c+1]
        pairwise_labels = tf.concat(pairwise_label_list, 1)  # [k,c] or [k,2*c]

        top_antecedent_scores = tf.boolean_mask(top_antecedent_scores,
                                                top_us_mask)
        pairwise_labels = tf.boolean_mask(pairwise_labels, top_us_mask)

        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        pairwise_labels = tf.concat([dummy_labels, pairwise_labels],
                                    1)  # [k, c + 1] or [k,2*c+1]

        loss = self.softmax_loss(top_antecedent_scores, pairwise_labels)
        loss = tf.reduce_sum(loss)

        if self.config["use_gold_bridging_anaphora"]:
            bridging_mask = tf.equal(top_is_status, 2)  # bridging
            top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask)
            top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask)
            top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask)
            top_antecedent_scores_output = tf.boolean_mask(
                top_bridging_scores, bridging_mask)
        elif self.config["remove_coref_anaphora"]:
            bridging_mask = tf.not_equal(top_is_status, 1)  # DO
            top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask)
            top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask)
            top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask)
            top_antecedent_scores_output = tf.boolean_mask(
                tf.concat([dummy_scores, top_bridging_scores], 1),
                bridging_mask)
        else:
            top_antecedent_scores_output = top_antecedent_scores

        return [
            top_span_starts, top_span_ends, top_span_cluster_ids,
            top_antecedents, top_antecedent_scores_output
        ], loss
    def get_antecedent_scores(self, mention_emb, mention_scores, antecedents,
                              antecedents_len, mention_starts, mention_ends,
                              mention_speaker_ids, genre_emb):
        num_mentions = util.shape(mention_emb, 0)
        max_antecedents = util.shape(antecedents, 1)

        feature_emb_list = []

        if self.config["use_metadata"]:
            antecedent_speaker_ids = tf.gather(
                mention_speaker_ids, antecedents)  # [num_mentions, max_ant]
            same_speaker = tf.equal(
                tf.unsqueeze(mention_speaker_ids, 1),
                antecedent_speaker_ids)  # [num_mentions, max_ant]
            same_speaker_emb = tf.zeros([2, self.config["feature_size"]])
            nn.init.xavier_uniform(same_speaker_emb)
            speaker_pair_emb = tf.gather(
                same_speaker_emb,
                same_speaker.int())  # [num_mentions, max_ant, emb]
            feature_emb_list.append(speaker_pair_emb)

            tiled_genre_emb = tf.unsqueeze(tf.unsqueeze(
                genre_emb, 0), 0).repeat([num_mentions, max_antecedents,
                                          1])  # [num_mentions, max_ant, emb]
            feature_emb_list.append(tiled_genre_emb)

        if self.config["use_features"]:
            target_indices = tf.range(num_mentions)  # [num_mentions]
            mention_distance = tf.unsqueeze(
                target_indices, 1) - antecedents  # [num_mentions, max_ant]
            mention_distance_bins = coref_ops.cofer_kernels_distance_bins(
                mention_distance)  # [num_mentions, max_ant]
            mention_distance_bins.set_shape([None, None])
            mention_distance_emb = tf.zeros([10, self.config["feature_size"]])
            nn.init.xavier_uniform(mention_distance_emb)
            mention_distance_emb = tf.gather(
                mention_distance_emb,
                mention_distance_bins)  # [num_mentions, max_ant]
            feature_emb_list.append(mention_distance_emb)

        feature_emb = tf.cat(feature_emb_list,
                             2)  # [num_mentions, max_ant, emb]
        feature_emb = F.dropout(feature_emb,
                                self.dropout)  # [num_mentions, max_ant, emb]

        antecedent_emb = tf.gather(mention_emb,
                                   antecedents)  # [num_mentions, max_ant, emb]
        target_emb_tiled = tf.unsqueeze(mention_emb, 1).repeat(
            [1, max_antecedents, 1])  # [num_mentions, max_ant, emb]
        similarity_emb = antecedent_emb * target_emb_tiled  # [num_mentions, max_ant, emb]

        pair_emb = tf.cat(
            [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb],
            2)  # [num_mentions, max_ant, emb]

        # with tf.variable_scope("iteration"):
        #     with tf.variable_scope("antecedent_scoring"):
        antecedent_scores = util.ffnn(
            pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1,
            self.dropout)  # [num_mentions, max_ant, 1]
        antecedent_scores = tf.squeeze(antecedent_scores,
                                       2)  # [num_mentions, max_ant]

        antecedent_mask = tf.log(
            tf.sequence_mask(antecedents_len,
                             max_antecedents,
                             dtype=tf.float32))  # [num_mentions, max_ant]
        antecedent_scores += antecedent_mask  # [num_mentions, max_ant]

        antecedent_scores += tf.unsqueeze(mention_scores, 1) + tf.gather(
            mention_scores, antecedents)  # [num_mentions, max_ant]
        antecedent_scores = tf.cat(
            [tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores],
            1)  # [num_mentions, max_ant + 1]
        return antecedent_scores  # [num_mentions, max_ant + 1]
Exemple #19
0
 def get_mention_scores(self, span_emb):
     with tf.variable_scope("mention_scores"):
         return util.ffnn(span_emb, self.config["ffnn_depth"],
                          self.config["ffnn_size"], 1,
                          self.dropout)  # [k, 1]
Exemple #20
0
    def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb,
                                 char_index, text_len, is_training,
                                 gold_starts, gold_ends, antecedents,
                                 antecedents_len, anaphors, gold_labels):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if self.lm_file:
            lm_emb_size = util.shape(lm_emb, 2)
            lm_num_layers = util.shape(lm_emb, 3)
            with tf.variable_scope("lm_aggregation"):
                self.lm_weights = tf.nn.softmax(
                    tf.get_variable("lm_scores", [lm_num_layers],
                                    initializer=tf.constant_initializer(0.0)))
                self.lm_scaling = tf.get_variable(
                    "lm_scaling", [], initializer=tf.constant_initializer(1.0))
            flattened_lm_emb = tf.reshape(lm_emb, [
                num_sentences * max_sentence_length * lm_emb_size,
                lm_num_layers
            ])
            flattened_aggregated_lm_emb = tf.matmul(
                flattened_lm_emb, tf.expand_dims(
                    self.lm_weights,
                    1))  # [num_sentences * max_sentence_length * emb, 1]
            aggregated_lm_emb = tf.reshape(
                flattened_aggregated_lm_emb,
                [num_sentences, max_sentence_length, lm_emb_size])
            aggregated_lm_emb *= self.lm_scaling
            context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]

        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        mention_emb = self.get_span_emb(flattened_head_emb, context_outputs,
                                        gold_starts, gold_ends)

        k = util.shape(antecedents, 0)
        c = util.shape(antecedents, 1)

        anaphor_emb = tf.gather(mention_emb, anaphors)  #[k,emb]
        antecedent_emb = tf.gather(mention_emb, antecedents)  # [k, c, emb]

        pair_emb = self.get_pair_embeddings(anaphor_emb, antecedents,
                                            antecedent_emb)  # [k, c,emb]

        with tf.variable_scope("plural_scores"):
            plural_scores = util.ffnn(pair_emb, self.config["ffnn_depth"],
                                      self.config["ffnn_size"], 1,
                                      self.dropout)  # [k, c, 1]
            plural_scores = tf.squeeze(plural_scores, 2)  # [k, c]
            plural_scores = plural_scores + tf.log(
                tf.sequence_mask(antecedents_len, c, tf.float32))

        dummy_scores = tf.zeros([k, 1])
        dummy_labels = tf.logical_not(
            tf.reduce_any(gold_labels, 1, keepdims=True))  # [k, 1]

        plural_scores_with_dummy = tf.concat([dummy_scores, plural_scores], 1)
        gold_labels_with_dummy = tf.concat([dummy_labels, gold_labels], 1)

        loss = self.softmax_loss(plural_scores_with_dummy,
                                 gold_labels_with_dummy)
        loss = tf.reduce_sum(loss)

        return [plural_scores, antecedents_len, anaphors], loss