def get_mention_scores(self, span_emb, span_starts, span_ends): with tf.variable_scope("mention_scores"): span_scores = util.ffnn(span_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, 1] if self.config['use_prior']: span_width_emb = tf.get_variable("span_width_prior_embeddings", [self.config["max_span_width"], self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)) # [W, emb] span_width_index = span_ends - span_starts # [NC] with tf.variable_scope("width_scores"): width_scores = util.ffnn(span_width_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [W, 1] width_scores = tf.gather(width_scores, span_width_index) span_scores += width_scores return span_scores
def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb): k = util.shape(top_span_emb, 0) c = util.shape(top_antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c] same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [k, c, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c] antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]), antecedent_distance_buckets) # [k, c] feature_emb_list.append(antecedent_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb] target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb] similarity_emb = top_antecedent_emb * target_emb # [k, c, emb] target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb] pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb] with tf.variable_scope("slow_antecedent_scores"): slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c] return slow_antecedent_scores # [k, c]
def get_antecedent_scores(top_span_emb, top_span_mention_scores, antecedents, config, dropout, top_fast_antecedent_scores, top_antecedent_offsets): k = util.shape(top_span_emb, 0) max_antecedents = util.shape(antecedents, 1) feature_emb_list = [] if config["use_features"]: # target_indices = tf.range(k) # [k] # antecedent_distance = tf.expand_dims(target_indices, 1) - antecedents # [k, max_ant] # antecedent_distance_buckets = bucket_distance(antecedent_distance) # [k, max_ant] antecedent_distance_buckets = bucket_distance(top_antecedent_offsets) with tf.variable_scope("features"): antecedent_distance_emb = tf.gather( tf.get_variable("antecedent_distance_emb", [10, config["feature_size"]]), antecedent_distance_buckets) # [k, max_ant] feature_emb_list.append(antecedent_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [k, max_ant, emb] feature_emb = tf.nn.dropout(feature_emb, dropout) # [k, max_ant, emb] antecedent_emb = tf.gather(top_span_emb, antecedents) # [k, max_ant, emb] target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb] similarity_emb = antecedent_emb * target_emb # [k, max_ant, emb] target_emb = tf.tile(target_emb, [1, max_antecedents, 1]) # [k, max_ant, emb] pair_emb = tf.concat([target_emb, antecedent_emb, similarity_emb, feature_emb], 2) # [k, max_ant, emb] with tf.variable_scope("antecedent_scores"): antecedent_scores = util.ffnn(pair_emb, config["ffnn_depth"], config["ffnn_size"], 1, dropout) # [k, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [k, max_ant] # antecedent_scores += tf.expand_dims(top_span_mention_scores, 1) + tf.gather( # top_span_mention_scores, antecedents) # [k, max_ant] antecedent_scores += top_fast_antecedent_scores return antecedent_scores, antecedent_emb, pair_emb # [k, max_ant]
def get_feature_attention_score(self, tmp_feature_emb, tmp_candidate_embedding, tmp_name): k = util.shape(tmp_feature_emb, 0) # [k, c, c = util.shape(tmp_feature_emb, 1) tmp_feature_size = util.shape(tmp_feature_emb, 2) tmp_emb_size = util.shape(tmp_candidate_embedding, 2) overall_emb = tf.concat([tmp_candidate_embedding, tmp_feature_emb], 2) # [k, c, feature_size+embedding_size] repeated_emb = tf.tile( tf.expand_dims(overall_emb, 1), [1, c, 1, 1]) # [k, c, c, feature_size+embedding_size] tiled_emb = tf.tile( tf.expand_dims(overall_emb, 2), [1, 1, c, 1]) # [k, c, c, feature_size+embedding_size] final_feature = tf.concat( [repeated_emb, tiled_emb, repeated_emb * tiled_emb], 3) # [k, c, c, (feature_size+embedding_size)*3] final_feature = tf.reshape( final_feature, [k, c * c, (tmp_feature_size + tmp_emb_size) * 3]) with tf.variable_scope(tmp_name): feature_attention_scores = util.ffnn(final_feature, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c*c, 1] feature_attention_scores = tf.reshape(feature_attention_scores, [k, c, c, 1]) return feature_attention_scores
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids): num_mentions = util.shape(mention_emb, 0) max_antecedents = util.shape(antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) # tile is duplicating data [a b c d] --> [a b c d a b c d] tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) if self.config["use_ner_phi"]: antecedent_ner_ids = tf.gather(mention_ner_ids, antecedents) same_ner = tf.equal(tf.expand_dims(mention_ner_ids, 1), antecedent_ner_ids) ner_pair_emb = tf.gather(tf.get_variable("same_ner_emb", [2, self.config["feature_size"]]), tf.to_int32(same_ner)) feature_emb_list.append(ner_pair_emb) # phi(i, j) feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb] # g_i antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] # g_j target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] # g_i . g_j similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] # [g_i, g_j, g_i . g_j, phi(i, j)] pair_emb = tf.concat([target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] with tf.variable_scope("iteration"): with tf.variable_scope("antecedent_scoring"): antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant] antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores, pair_emb # [num_mentions, max_ant + 1]
def get_unary_scores(span_emb, config, dropout, num_labels = 1, name="span_scores"): """Compute span score with FFNN(span embedding). Args: span_emb: Tensor of [num_sentences, num_spans, emb]. """ with tf.variable_scope(name): scores = util.ffnn(span_emb, config["ffnn_depth"], config["ffnn_size"], num_labels, dropout) # [num_sentences, num_spans, num_labels] or [k, num_labels] if num_labels == 1: scores = tf.squeeze(scores, -1) # [num_sentences, num_spans] or [k] return scores
def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns): k = util.shape(top_span_emb, 0) c = util.shape(top_antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c] same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [k, c, emb] feature_emb_list.append(speaker_pair_emb) top_antecedent_genders = tf.gather(top_span_genders, top_antecedents) same_gender = ((tf.expand_dims(top_span_genders,1) * top_antecedent_genders) >= 0) same_gender_emb = tf.gather(tf.get_variable("same_gender_emb", [2, self.config["feature_size"]]), tf.to_int32(same_gender)) feature_emb_list.append(same_gender_emb) top_antecedent_fpronouns = tf.gather(top_span_fpronouns, top_antecedents) # [k, c] fpronoun_count = tf.add(tf.expand_dims(top_span_fpronouns, 1), top_antecedent_fpronouns) # [k, c] no_same_speaker = tf.to_int32(tf.logical_not(tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids))) # [k, c] same_speaker_and_fp = (tf.add(fpronoun_count,no_same_speaker) < 3) same_speaker_and_fp_emb = tf.gather(tf.get_variable("same_speaker_and_fp_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker_and_fp)) feature_emb_list.append(same_speaker_and_fp_emb) #tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb] #feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c] antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]), antecedent_distance_buckets) # [k, c] feature_emb_list.append(antecedent_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb] target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb] similarity_emb = top_antecedent_emb * target_emb # [k, c, emb] target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb] target_scene_emb = tf.expand_dims(top_scene_emb, 1) # [k, 1, emb-scene] target_scene_emb = tf.tile(target_scene_emb, [1, c, 1]) # [k, c, emb] if (self.config['use_video']): pair_emb = tf.concat([target_scene_emb, top_antecedent_scene_emb, target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb] else: pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb] with tf.variable_scope("slow_antecedent_scores"): slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c] return slow_antecedent_scores # [k, c]
def get_feature_score(self, tmp_feature_emb, tmp_feature_name): k = util.shape(tmp_feature_emb, 0) c = util.shape(tmp_feature_emb, 1) repeated_feature_emb = tf.tile(tf.expand_dims(tmp_feature_emb, 1), [1, c, 1, 1]) # [k, c, c, feature_size] tiled_feature_emb = tf.tile(tf.expand_dims(tmp_feature_emb, 2), [1, 1, c, 1]) # [k, c, c, feature_size] final_feature = tf.concat([repeated_feature_emb, tiled_feature_emb, repeated_feature_emb * tiled_feature_emb], 3) # [k, c, c, feature_size*3] final_feature = tf.reshape(final_feature, [k, c * c, self.config["feature_size"] * 3]) # [k, c*c, feature_size*3] with tf.variable_scope(tmp_feature_name): tmp_feature_scores = util.ffnn(final_feature, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c*c, 1] tmp_feature_scores = tf.reshape(tmp_feature_scores, [k, c, c, 1]) # [k, c, c] return tmp_feature_scores
def get_coreference_score(self, candidate_NPs_emb, pronoun_emb, candidate_NPs_speaker_ids, pronoun_speaker_id, candidate_NP_offsets, pronoun_offsets, number_features, plurality_features): k = util.shape(candidate_NPs_emb, 0) c = util.shape(candidate_NPs_emb, 1) feature_emb_list = [] if self.config["use_metadata"]: same_speaker = tf.equal(candidate_NPs_speaker_ids, tf.tile(pronoun_speaker_id, [1, c])) # [k, c] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [k, c, emb] feature_emb_list.append(speaker_pair_emb) if self.config["use_features"]: antecedent_distance_buckets = self.bucket_distance( tf.nn.relu(tf.tile(pronoun_speaker_id, [1, c]) - candidate_NP_offsets)) # [k, c] antecedent_distance_emb = tf.gather( tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]]), antecedent_distance_buckets) # [c, emb] feature_emb_list.append(antecedent_distance_emb) if self.config['knowledge_as_feature']: number_emb = tf.gather(tf.get_variable("number_emb", [2, self.config["feature_size"]]), number_features) # [k, c, feature_size] plurality_emb = tf.gather(tf.get_variable("plurality_emb", [2, self.config["feature_size"]]), plurality_features) # [k, c, feature_size] feature_emb_list.append(number_emb) feature_emb_list.append(plurality_emb) feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb] [?, ?, 40] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb] target_emb = tf.tile(pronoun_emb, [1, c, 1]) # [k, c, emb] similarity_emb = candidate_NPs_emb * target_emb # [k, c, emb] # candidate_emb + pronoun_emb * candidate_emb + pronoun_emb pair_emb = tf.concat([target_emb, candidate_NPs_emb, similarity_emb, feature_emb], 2) # [k, c, emb] with tf.variable_scope("slow_antecedent_scores"): slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c] return slow_antecedent_scores # [c]
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb): num_mentions = util.shape(mention_emb, 0) max_antecedents = util.shape(antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb] antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] pair_emb = tf.concat([target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] with tf.variable_scope("iteration"): with tf.variable_scope("antecedent_scoring"): antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant] antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores # [num_mentions, max_ant + 1]
def get_slow_antecedent_scores(self, top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance=None): k = util.shape(top_span_emb, 0) c = util.shape(top_antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: top_antecedent_speaker_ids = tf.gather(top_span_speaker_ids, top_antecedents) # [k, c] same_speaker = tf.equal(tf.expand_dims(top_span_speaker_ids, 1), top_antecedent_speaker_ids) # [k, c] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), tf.to_int32(same_speaker)) # [k, c, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [k, c, 1]) # [k, c, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: antecedent_distance_buckets = self.bucket_distance(top_antecedent_offsets) # [k, c] antecedent_distance_emb = tf.gather(tf.get_variable("antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), antecedent_distance_buckets) # [k, c] feature_emb_list.append(antecedent_distance_emb) if segment_distance is not None: with tf.variable_scope('segment_distance', reuse=tf.AUTO_REUSE): segment_distance_emb = tf.gather(tf.get_variable("segment_distance_embeddings", [self.config['max_training_sentences'], self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), segment_distance) # [k, emb] feature_emb_list.append(segment_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [k, c, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [k, c, emb] target_emb = tf.expand_dims(top_span_emb, 1) # [k, 1, emb] similarity_emb = top_antecedent_emb * target_emb # [k, c, emb] target_emb = tf.tile(target_emb, [1, c, 1]) # [k, c, emb] pair_emb = tf.concat([target_emb, top_antecedent_emb, similarity_emb, feature_emb], 2) # [k, c, emb] with tf.variable_scope("slow_antecedent_scores"): slow_antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_antecedent_scores = tf.squeeze(slow_antecedent_scores, 2) # [k, c] return slow_antecedent_scores # [k, c]
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, text_emb, text_outputs, context_pre_starts, context_pos_ends): num_mentions = util.shape(mention_emb, 0) max_antecedents = util.shape(antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: antecedent_speaker_ids = tf.gather( mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal( tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] speaker_pair_emb = tf.gather( tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile( tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1 ]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.expand_dims( target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.distance_bins( mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.gather( tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = tf.nn.dropout( feature_emb, self.dropout) # [num_mentions, max_ant, emb] ########### Context Embeddings ################# context_pre_ends = mention_starts - 1 context_pos_starts = mention_ends + 1 context_pre_width = mention_starts - context_pre_starts context_pos_width = context_pos_ends - mention_ends context_start_emb = tf.gather(text_outputs, context_pre_starts) context_end_emb = tf.gather(text_outputs, context_pos_ends) context_output = tf.concat([context_start_emb, context_end_emb], 1) context_output = tf.tile(tf.expand_dims(context_output, 1), [1, self.config["max_context_width"], 1]) mention_output = tf.tile(tf.expand_dims(mention_emb, 1), [1, self.config["max_context_width"], 1]) # context_width = 1 + context_ends - context_starts context_pre_indices = tf.expand_dims( tf.range( self.config["max_context_width"] / 2), 0) + tf.expand_dims( context_pre_starts, 1) # [num_mentions, max_mention_width] context_pre_indices = tf.minimum( util.shape(text_outputs, 0) - 1, context_pre_indices) # [num_mentions, max_mention_width] context_pre_mask = tf.expand_dims( tf.sequence_mask(context_pre_width, self.config["max_context_width"] / 2, dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] context_pos_indices = tf.expand_dims( tf.range( self.config["max_context_width"] / 2), 0) + tf.expand_dims( context_pos_starts, 1) # [num_mentions, max_mention_width] context_pos_indices = tf.minimum( util.shape(text_outputs, 0) - 1, context_pos_indices) # [num_mentions, max_mention_width] context_pos_mask = tf.expand_dims( tf.sequence_mask(context_pos_width, self.config["max_context_width"] / 2, dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] context_indices = tf.concat([context_pre_indices, context_pos_indices], 1) context_mask = tf.concat([context_pre_mask, context_pos_mask], 1) context_glove_emb = tf.gather(text_emb, context_indices) context_att_score = util.projection_name( tf.concat([context_glove_emb, context_output, mention_output], 2), 1, "context_att") context_attention = tf.nn.softmax( context_att_score + tf.log(context_mask), dim=1) # [num_mentions, max_mention_width, 1] context_emb = tf.reduce_sum(context_attention * context_glove_emb, 1) # [num_mentions, emb] mention_emb = tf.concat([context_emb, mention_emb], 1) ################################################ antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] self.mention_emb_shape = tf.shape(mention_emb) self.mention_start_shape = tf.shape(antecedents) target_emb_tiled = tf.tile( tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] pair_emb = tf.concat( [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] with tf.variable_scope("iteration"): with tf.variable_scope("antecedent_scoring"): antecedent_scores = util.ffnn( pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log( tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather( mention_scores, antecedents) # [num_mentions, max_ant] antecedent_scores = tf.concat( [tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores # [num_mentions, max_ant + 1]
def get_mention_scores(self, mention_emb): with tf.variable_scope("mention_scores"): return util.ffnn(mention_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, 1]
def get_mention_proposal_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map, span_mention=None): """get mention proposals""" start_end_loss_mask = tf.cast( tf.where(tf.cast( tf.math.greater_equal(input_ids, tf.zeros_like(input_ids)), tf.bool), x=tf.ones_like(input_ids), y=tf.zeros_like(input_ids)), tf.float32) input_ids = tf.where(tf.cast( tf.math.greater_equal(input_ids, tf.zeros_like(input_ids)), tf.bool), x=input_ids, y=tf.zeros_like(input_ids)) input_mask = tf.where(tf.cast( tf.math.greater_equal(input_mask, tf.zeros_like(input_mask)), tf.bool), x=input_mask, y=tf.zeros_like(input_mask)) text_len = tf.where(tf.cast( tf.math.greater_equal(text_len, tf.zeros_like(text_len)), tf.bool), x=text_len, y=tf.zeros_like(text_len)) speaker_ids = tf.where(tf.cast( tf.math.greater_equal(speaker_ids, tf.zeros_like(speaker_ids)), tf.bool), x=speaker_ids, y=tf.zeros_like(speaker_ids)) gold_starts = tf.where(tf.cast( tf.math.greater_equal(gold_starts, tf.zeros_like(gold_starts)), tf.bool), x=gold_starts, y=tf.zeros_like(gold_starts)) gold_ends = tf.where(tf.cast( tf.math.greater_equal(gold_ends, tf.zeros_like(gold_ends)), tf.bool), x=gold_ends, y=tf.zeros_like(gold_ends)) cluster_ids = tf.where(tf.cast( tf.math.greater_equal(cluster_ids, tf.zeros_like(cluster_ids)), tf.bool), x=cluster_ids, y=tf.zeros_like(cluster_ids)) sentence_map = tf.where(tf.cast( tf.math.greater_equal(sentence_map, tf.zeros_like(sentence_map)), tf.bool), x=sentence_map, y=tf.zeros_like(sentence_map)) span_mention = tf.where(tf.cast( tf.math.greater_equal(span_mention, tf.zeros_like(span_mention)), tf.bool), x=span_mention, y=tf.zeros_like(span_mention)) span_mention_loss_mask = tf.cast( tf.where(tf.cast( tf.math.greater_equal(span_mention, tf.zeros_like(span_mention)), tf.bool), x=tf.ones_like(span_mention), y=tf.zeros_like(span_mention)), tf.float32) # span # gold_starts -> [1, 3, 5, 8, -1, -1, -1, -1] -> [1, 3, 5, 8, 0, 0, 0, 0] input_ids = tf.reshape(input_ids, [-1, self.config["max_segment_len"] ]) # (max_train_sent, max_segment_len) input_mask = tf.reshape(input_mask, [-1, self.config["max_segment_len"] ]) # (max_train_sent, max_segment_len) text_len = tf.reshape(text_len, [-1]) # (max_train_sent) speaker_ids = tf.reshape(speaker_ids, [-1, self.config["max_segment_len"] ]) # (max_train_sent, max_segment_len) sentence_map = tf.reshape(sentence_map, [-1]) # (max_train_sent * max_segment_len) cluster_ids = tf.reshape(cluster_ids, [-1]) # (max_train_sent * max_segment_len) gold_starts = tf.reshape(gold_starts, [-1]) # (max_train_sent * max_segment_len) gold_ends = tf.reshape(gold_ends, [-1]) # (max_train_sent * max_segment_len) span_mention = tf.reshape(span_mention, [ self.config["max_training_sentences"], self.config["max_segment_len"] * self.config["max_segment_len"] ]) # span_mention : (max_train_sent, max_segment_len, max_segment_len) model = modeling.BertModel(config=self.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False, scope='bert') self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) mention_doc = model.get_sequence_output( ) # (max_train_sent, max_segment_len, hidden) mention_doc = self.flatten_emb_by_sentence( mention_doc, input_mask ) # (max_train_sent, max_segment_len, emb) -> (max_train_sent * max_segment_len, e) 取出有效token的emb num_words = util.shape(mention_doc, 0) # max_train_sent * max_segment_len seg_mention_doc = tf.reshape(mention_doc, [ self.config["max_training_sentences"], self.config["max_segment_len"], -1 ]) # (max_train_sent, max_segment_len, embed) start_seg_mention_doc = tf.stack( [seg_mention_doc] * self.config["max_segment_len"], axis=1 ) # (max_train_sent, 1, max_segment_len, embed) -> (max_train_sent, max_segment_len, max_segment_len, embed) end_seg_mention_doc = tf.stack( [ seg_mention_doc, ] * self.config["max_segment_len"], axis=2 ) # (max_train_sent, max_segment_len, 1, embed) -> (max_train_sent, max_segment_len, max_segment_len, embed) span_mention_doc = tf.concat( [start_seg_mention_doc, end_seg_mention_doc], axis=-1 ) # (max_train_sent, max_segment_len, max_segment_len, embed * 2) span_mention_doc = tf.reshape(span_mention_doc, (self.config["max_training_sentences"] * self.config["max_segment_len"] * self.config["max_segment_len"], -1)) # # (max_train_sent * max_segment_len * max_segment_len, embed * 2) with tf.variable_scope("span_scores", reuse=tf.AUTO_REUSE): # [k, 1] 每个候选span的得分 span_scores = util.ffnn( span_mention_doc, self.config["ffnn_depth"], self.config["ffnn_size"] * 2, 1, self.dropout) # (max_train_sent, max_segment_len, 1) with tf.variable_scope("start_scores", reuse=tf.AUTO_REUSE): # [k, 1] 每个候选span的得分 start_scores = util.ffnn( mention_doc, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # (max_train_sent, max_segment_len, 1) with tf.variable_scope("end_scores", reuse=tf.AUTO_REUSE): # [k, 1] 每个候选span的得分 end_scores = util.ffnn( mention_doc, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # (max_train_sent, max_segment_len, 1) gold_start_label = tf.reshape(gold_starts, [-1, 1]) # gold_starts -> [1, 3, 5, 8, -1, -1, -1, -1] start_value = tf.reshape(tf.ones_like(gold_starts), [-1]) start_shape = tf.constant([ self.config["max_training_sentences"] * self.config["max_segment_len"] ]) gold_start_label = tf.cast( tf.scatter_nd(gold_start_label, start_value, start_shape), tf.int32) # gold_start_label = tf.boolean_mask(gold_start_label, tf.reshape(input_mask, [-1])) gold_end_label = tf.reshape(gold_ends, [-1, 1]) end_value = tf.reshape(tf.ones_like(gold_ends), [-1]) end_shape = tf.constant([ self.config["max_training_sentences"] * self.config["max_segment_len"] ]) gold_end_label = tf.cast( tf.scatter_nd(gold_end_label, end_value, end_shape), tf.int32) # gold_end_label = tf.boolean_mask(gold_end_label, tf.reshape(input_mask, [-1])) start_scores = tf.cast(tf.reshape(tf.sigmoid(start_scores), [-1]), tf.float32) end_scores = tf.cast(tf.reshape(tf.sigmoid(end_scores), [-1]), tf.float32) span_scores = tf.cast(tf.reshape(tf.sigmoid(span_scores), [-1]), tf.float32) # span_mention = tf.cast(span_mention, tf.float32) start_scores = tf.stack([(1 - start_scores), start_scores], axis=-1) end_scores = tf.stack([(1 - end_scores), end_scores], axis=-1) span_scores = tf.stack([(1 - span_scores), span_scores], axis=-1) gold_start_label = tf.cast( tf.one_hot(tf.reshape(gold_start_label, [-1]), 2, axis=-1), tf.float32) gold_end_label = tf.cast( tf.one_hot(tf.reshape(gold_end_label, [-1]), 2, axis=-1), tf.float32) span_mention = tf.cast( tf.one_hot(tf.reshape(span_mention, [-1]), 2, axis=-1), tf.float32) start_end_loss_mask = tf.reshape(start_end_loss_mask, [-1]) start_loss = self.bce_loss(y_pred=start_scores, y_true=gold_start_label) end_loss = self.bce_loss(y_pred=end_scores, y_true=gold_end_label) span_loss = self.bce_loss(y_pred=span_scores, y_true=span_mention) start_loss = tf.reduce_mean( tf.multiply(start_loss, tf.cast(start_end_loss_mask, tf.float32))) end_loss = tf.reduce_mean( tf.multiply(end_loss, tf.cast(start_end_loss_mask, tf.float32))) span_loss = tf.reduce_mean( tf.multiply(span_loss, tf.cast(span_mention_loss_mask, tf.float32))) if span_mention is None: loss = self.config["start_ratio"] * start_loss + self.config[ "end_ratio"] * end_loss return loss, start_scores, end_scores else: loss = self.config["start_ratio"] * start_loss + self.config[ "end_ratio"] * end_loss + self.config[ "mention_ratio"] * span_loss return loss, start_scores, end_scores, span_scores
def get_context_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, context_starts, context_ends, text_outputs, text_emb): num_mentions = util.shape(mention_emb, 0) max_antecedents = util.shape(antecedents, 1) self.num_words = tf.shape(text_outputs) self.num_mentions = num_mentions feature_emb_list = [] if self.config["use_metadata"]: antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.config["feature_size"]]), tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.expand_dims(target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.distance_bins(mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.gather(tf.get_variable("mention_distance_emb", [10, self.config["feature_size"]]), mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = tf.nn.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb] ############################# # # Get matrix for co-attention # ############################# ####### Mention Level ####### mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_features = tf.concat([mention_start_emb, mention_end_emb], 1) mention_width = 1 + mention_ends - mention_starts # [num_mentions] mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width] mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width] mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] antecedent_indices = tf.gather(mention_indices, antecedents) antecedent_mask = tf.gather(mention_mask, antecedents) antecedent_indices_emb = tf.gather(text_outputs, antecedent_indices) target_indices = tf.tile(tf.expand_dims(mention_indices, 1), [1, max_antecedents, 1]) target_mask = tf.tile(tf.expand_dims(mention_mask, 1), [1, max_antecedents, 1, 1]) target_indices_emb = tf.gather(text_outputs, target_indices) ####### Context Level ####### context_start_emb = tf.gather(text_outputs, context_starts) context_end_emb = tf.gather(text_outputs, context_ends) context_width = 1 + context_ends - context_starts context_indices = tf.expand_dims(tf.range(self.config["max_context_width"]), 0) + tf.expand_dims(context_starts, 1) # [num_mentions, max_mention_width] context_indices = tf.minimum(util.shape(text_outputs, 0) - 1, context_indices) # [num_mentions, max_mention_width] context_mask = tf.expand_dims(tf.sequence_mask(context_width, self.config["max_context_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] antecedent_context_indices = tf.gather(context_indices, antecedents) antecedent_context_mask = tf.gather(context_mask, antecedents) antecedent_context_indices_emb = tf.gather(text_outputs, antecedent_context_indices) target_context_indices = tf.tile(tf.expand_dims(context_indices, 1), [1, max_antecedents, 1]) target_context_mask = tf.tile(tf.expand_dims(context_mask, 1), [1, max_antecedents, 1, 1]) target_context_indices_emb = tf.gather(text_outputs, target_context_indices) #### Initial Embeddings ##### antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] target_emb_tiled = tf.tile(tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] context_emb = tf.concat([context_start_emb, context_end_emb], 1) antecedent_context_emb = tf.gather(context_emb, antecedents) # [num_mentions, max_ant, emb] target_context_emb_tiled = tf.tile(tf.expand_dims(context_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] ############################# # # Calculate Co-attention # ############################# ###### C_a Attention ######## window_emb = tf.concat([antecedent_emb, target_emb_tiled, target_context_emb_tiled], 2) window_scores = util.projection_name(window_emb, 100, 'c_a_window') window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_context_width'], 1]) target_scores = util.projection_name(antecedent_context_indices_emb, 100, 'c_a_target') temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score') temp_att = tf.nn.softmax(temp_scores + tf.log(antecedent_context_mask), dim=2) # [num_mentions, max_mention_width, 1] antecedent_context_emb = tf.reduce_sum(temp_att * tf.gather(text_emb, antecedent_context_indices), 2) ###### C_t Attention ######## window_emb = tf.concat([antecedent_emb, target_emb_tiled, antecedent_context_emb], 2) window_scores = util.projection_name(window_emb, 100, 'c_t_window') window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_context_width'], 1]) target_scores = util.projection_name(target_context_indices_emb, 100, 'c_t_target') temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score') temp_att = tf.nn.softmax(temp_scores + tf.log(target_context_mask), dim=2) # [num_mentions, max_mention_width, 1] target_context_emb_tiled = tf.reduce_sum(temp_att * tf.gather(text_emb, target_context_indices), 2) ###### M_t Attention ######## window_emb = tf.concat([antecedent_emb, antecedent_context_emb, target_context_emb_tiled], 2) window_scores = util.projection_name(window_emb, 100, 'm_t_window') window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_mention_width'], 1]) target_scores = util.projection_name(target_indices_emb, 100, 'm_t_target') temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score') temp_att = tf.nn.softmax(temp_scores + tf.log(target_mask), dim=2) # [num_mentions, max_mention_width, 1] target_emb_tiled = tf.reduce_sum(temp_att * tf.gather(text_emb, target_indices), 2) ###### M_a Attention ######## window_emb = tf.concat([target_emb_tiled, target_context_emb_tiled, antecedent_context_emb], 2) window_scores = util.projection_name(window_emb, 100, 'm_a_window') window_scores = tf.tile(tf.expand_dims(window_scores, 2), [1, 1, self.config['max_mention_width'], 1]) target_scores = util.projection_name(antecedent_indices_emb, 100, 'm_a_target') temp_scores = util.projection_name(window_scores + target_scores, 1, 'att_score') temp_att = tf.nn.softmax(temp_scores + tf.log(antecedent_mask), dim=2) # [num_mentions, max_mention_width, 1] antecedent_emb = tf.reduce_sum(temp_att * tf.gather(text_emb, antecedent_indices), 2) ############################# # # Calculate Pair Embeddings # ############################# antecedent_feature = tf.gather(mention_features, antecedents) # [num_mentions, max_ant, emb] target_feature = tf.tile(tf.expand_dims(mention_features, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] # similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] # pair_emb = tf.concat([target_emb_tiled_1, antecedent_emb_1, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] pair_emb = tf.concat([ target_feature, target_emb_tiled, antecedent_feature, antecedent_emb, antecedent_context_emb, target_context_emb_tiled, similarity_emb, feature_emb], 2) ''' pair_emb = tf.nn.relu(util.projection_name(target_emb_tiled, self.config['ffnn_size'], 'comp_mt') +\ util.projection_name(antecedent_emb, self.config['ffnn_size'], 'comp_ma') +\ util.projection_name(antecedent_context_emb_1, self.config['ffnn_size'], 'comp_ca') +\ util.projection_name(target_context_emb_tiled_1, self.config['ffnn_size'], 'comp_ct') +\ util.projection_name(similarity_emb, self.config['ffnn_size'], 'comp_sim') +\ util.projection_name(feature_emb, self.config['ffnn_size'], 'comp_feature')) ''' ############################# with tf.variable_scope("iteration"): with tf.variable_scope("antecedent_scoring"): antecedent_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log(tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores, antecedents) # [num_mentions, max_ant] antecedent_scores = tf.concat([tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores # [num_mentions, max_ant + 1]
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module( inputs={"tokens": tokens, "sequence_len": text_len}, signature="tokens", as_dict=True) word_emb = lm_embeddings["word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] #debug prev_can_st = candidate_starts prev_can_ends = candidate_ends #debug candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] combined_candidate_st = candidate_starts*10000 + candidate_ends combined_gold_st = gold_starts*10000 + gold_ends _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions] whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates] gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions] candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] #Video Scene Emb ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100] candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100] ''' #Comment : This part is for calculating mention scores and prnunign metnion #It is not used for this task, because mention boundary are given. candidate_mention_scores = self.get_mention_scores(candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] ''' ######## Only Using Gold Span Indices ##### k = tf.to_int32(util.shape(gold_span_indices,0)) top_span_indices = gold_span_indices ############ top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] top_span_genders = tf.gather(genders, top_span_ends) top_span_fpronouns = tf.gather(fpronouns, top_span_ends) # k : total number of candidates span (M in paper) # c : how many antecedents we check (K in paper) c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c) else: #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c) top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c] top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범 dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1] if (self.config["use_gender_logic_rule"]): top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k) ''' marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1) gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) top_antecedent_scores = top_antecedent_prob ''' origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] origin_loss = tf.reduce_sum(origin_loss) # cross_entropy : -1 * ground_truth * log(prediction) #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores)) teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1])) pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) # For Validation Loss marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1) validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) #loss = teacher_loss + origin_loss loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss) top_antecedent_scores = top_antecedent_prob_with_logic else: loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] teacher_loss = loss origin_loss = loss return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, bridging_ante_cids, is_status, us_mask): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if self.lm_file: lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] top_span_starts = gold_starts top_span_ends = gold_ends top_span_cluster_ids = cluster_ids top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, top_span_starts, top_span_ends) top_span_mention_scores = tf.zeros_like(gold_starts, dtype=tf.float32) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) top_span_bridging_ante_cids = bridging_ante_cids top_us_mask = us_mask top_is_status = is_status k = util.shape(top_span_starts, 0) c = tf.minimum(self.config["max_top_antecedents"], k) top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] pair_emb = self.get_pair_embeddings(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c,emb] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] shared_depth = 0 if self.config["shared_depth"] > 0: flattened_pair_emb = tf.reshape( pair_emb, [k * c, util.shape(pair_emb, 2)]) shared_depth = min(self.config["shared_depth"], self.config["ffnn_depth"]) for i in range(shared_depth): hidden_weights = tf.get_variable( "shared_hidden_weights_{}".format(i), [ util.shape(flattened_pair_emb, 1), self.config["ffnn_size"] ]) hidden_bias = tf.get_variable( "shared_hidden_bias_{}".format(i), [self.config["ffnn_size"]]) flattened_pair_emb = tf.nn.relu( tf.nn.xw_plus_b(flattened_pair_emb, hidden_weights, hidden_bias)) flattened_pair_emb = tf.nn.dropout(flattened_pair_emb, self.dropout) pair_emb = tf.reshape(flattened_pair_emb, [k, c, self.config["ffnn_size"]]) ante_score_list = [] pairwise_label_list = [] dummy_scores = tf.zeros([k, 1]) # [k, 1] ante_score_list.append(dummy_scores) with tf.variable_scope("slow_bridging_scores"): slow_bridging_scores = util.ffnn( pair_emb, self.config["ffnn_depth"] - shared_depth, self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_bridging_scores = tf.squeeze(slow_bridging_scores, 2) # [k, c] top_bridging_scores = slow_bridging_scores + top_fast_antecedent_scores ante_score_list.append(top_bridging_scores) bridging_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_bridging_ante_cids, 1)) # [k, c] non_dummy_bridging_indicator = tf.expand_dims( top_span_bridging_ante_cids > 0, 1) # [k, 1] bridging_pairwise_labels = tf.logical_and( bridging_cluster_indicator, non_dummy_bridging_indicator) # [k, c] pairwise_label_list.append(bridging_pairwise_labels) if self.config["train_with_coref"]: with tf.variable_scope("slow_coreference_scores"): slow_coref_scores = util.ffnn( pair_emb, self.config["ffnn_depth"] - shared_depth, self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_coref_scores = tf.squeeze(slow_coref_scores, 2) # [k, c] top_coref_scores = slow_coref_scores + top_fast_antecedent_scores ante_score_list.append(top_coref_scores) coref_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k,c] non_dummy_coref_indicator = tf.expand_dims( top_span_cluster_ids > 0, 1) # [k,1] coref_pairwise_labels = tf.logical_and( coref_cluster_indicator, non_dummy_coref_indicator) # [k,c] pairwise_label_list.append(coref_pairwise_labels) top_antecedent_scores = tf.concat(ante_score_list, 1) # [k, c + 1] or [k, 2*c+1] pairwise_labels = tf.concat(pairwise_label_list, 1) # [k,c] or [k,2*c] top_antecedent_scores = tf.boolean_mask(top_antecedent_scores, top_us_mask) pairwise_labels = tf.boolean_mask(pairwise_labels, top_us_mask) dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] pairwise_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] or [k,2*c+1] loss = self.softmax_loss(top_antecedent_scores, pairwise_labels) loss = tf.reduce_sum(loss) if self.config["use_gold_bridging_anaphora"]: bridging_mask = tf.equal(top_is_status, 2) # bridging top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask) top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask) top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask) top_antecedent_scores_output = tf.boolean_mask( top_bridging_scores, bridging_mask) elif self.config["remove_coref_anaphora"]: bridging_mask = tf.not_equal(top_is_status, 1) # DO top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask) top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask) top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask) top_antecedent_scores_output = tf.boolean_mask( tf.concat([dummy_scores, top_bridging_scores], 1), bridging_mask) else: top_antecedent_scores_output = top_antecedent_scores return [ top_span_starts, top_span_ends, top_span_cluster_ids, top_antecedents, top_antecedent_scores_output ], loss
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb): num_mentions = util.shape(mention_emb, 0) max_antecedents = util.shape(antecedents, 1) feature_emb_list = [] if self.config["use_metadata"]: antecedent_speaker_ids = tf.gather( mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal( tf.unsqueeze(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] same_speaker_emb = tf.zeros([2, self.config["feature_size"]]) nn.init.xavier_uniform(same_speaker_emb) speaker_pair_emb = tf.gather( same_speaker_emb, same_speaker.int()) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.unsqueeze(tf.unsqueeze( genre_emb, 0), 0).repeat([num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.config["use_features"]: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.unsqueeze( target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.cofer_kernels_distance_bins( mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.zeros([10, self.config["feature_size"]]) nn.init.xavier_uniform(mention_distance_emb) mention_distance_emb = tf.gather( mention_distance_emb, mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) feature_emb = tf.cat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = F.dropout(feature_emb, self.dropout) # [num_mentions, max_ant, emb] antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] target_emb_tiled = tf.unsqueeze(mention_emb, 1).repeat( [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] pair_emb = tf.cat( [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] # with tf.variable_scope("iteration"): # with tf.variable_scope("antecedent_scoring"): antecedent_scores = util.ffnn( pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log( tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.unsqueeze(mention_scores, 1) + tf.gather( mention_scores, antecedents) # [num_mentions, max_ant] antecedent_scores = tf.cat( [tf.zeros([util.shape(mention_scores, 0), 1]), antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores # [num_mentions, max_ant + 1]
def get_mention_scores(self, span_emb): with tf.variable_scope("mention_scores"): return util.ffnn(span_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, 1]
def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb, char_index, text_len, is_training, gold_starts, gold_ends, antecedents, antecedents_len, anaphors, gold_labels): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if self.lm_file: lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] mention_emb = self.get_span_emb(flattened_head_emb, context_outputs, gold_starts, gold_ends) k = util.shape(antecedents, 0) c = util.shape(antecedents, 1) anaphor_emb = tf.gather(mention_emb, anaphors) #[k,emb] antecedent_emb = tf.gather(mention_emb, antecedents) # [k, c, emb] pair_emb = self.get_pair_embeddings(anaphor_emb, antecedents, antecedent_emb) # [k, c,emb] with tf.variable_scope("plural_scores"): plural_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] plural_scores = tf.squeeze(plural_scores, 2) # [k, c] plural_scores = plural_scores + tf.log( tf.sequence_mask(antecedents_len, c, tf.float32)) dummy_scores = tf.zeros([k, 1]) dummy_labels = tf.logical_not( tf.reduce_any(gold_labels, 1, keepdims=True)) # [k, 1] plural_scores_with_dummy = tf.concat([dummy_scores, plural_scores], 1) gold_labels_with_dummy = tf.concat([dummy_labels, gold_labels], 1) loss = self.softmax_loss(plural_scores_with_dummy, gold_labels_with_dummy) loss = tf.reduce_sum(loss) return [plural_scores, antecedents_len, anaphors], loss