def emb2cnn(self, emb_list): pronoun_embedding, name_embedding, status_embedding = emb_list flattened_pronoun_emb = util.cnn(pronoun_embedding, self.config["filter_widths"], self.config["emb_filter_size"], name='p_') flattened_name_emb = util.cnn(name_embedding, self.config["filter_widths"], self.config["emb_filter_size"], name='n_') flattened_status_emb = util.cnn(status_embedding, self.config["filter_widths"], self.config["emb_filter_size"], name='s_') return flattened_pronoun_emb, flattened_name_emb, flattened_status_emb
def emb2cnn(self, emb_list): pronoun_embedding, name_embedding, status_embedding = emb_list pronoun_embedding = tf.transpose(pronoun_embedding, [1, 0, 2]) # 1, k ,emb name_embedding = tf.transpose(name_embedding, [1, 0, 2]) # pronoun_embedding = tf.squeeze(pronoun_embedding, 1) # name_embedding = tf.squeeze(name_embedding, 1) flattened_pronoun_emb = util.cnn(pronoun_embedding, self.config["emb_filter_widths"], self.config["emb_filter_size"], name='p_') flattened_name_emb = util.cnn(name_embedding, self.config["emb_filter_widths"], self.config["emb_filter_size"], name='n_') return flattened_pronoun_emb, flattened_name_emb
def get_predictions_and_loss(self, inputs): tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(tokens)[0] max_sentence_length = tf.shape(tokens)[1] context_emb_list = [] context_emb_list.append(context_word_emb) char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) if self.lm_file is not None: # Only add these layers if we're using contextualized embeddings lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] candidate_scores_mask = tf.logical_and(tf.expand_dims(text_len_mask,[1]),tf.expand_dims(text_len_mask,[2])) #[num_sentence, max_sentence_length,max_sentence_length] sentence_ends_leq_starts = tf.tile(tf.expand_dims(tf.logical_not(tf.sequence_mask(tf.range(max_sentence_length),max_sentence_length)), 0),[num_sentences,1,1]) #[num_sentence, max_sentence_length,max_sentence_length] candidate_scores_mask = tf.logical_and(candidate_scores_mask,sentence_ends_leq_starts) flattened_candidate_scores_mask = tf.reshape(candidate_scores_mask,[-1]) #[num_sentence * max_sentence_length * max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len,self.lstm_dropout) # [num_sentence, max_sentence_length, emb] with tf.variable_scope("candidate_starts_ffnn"): candidate_starts_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length,emb] with tf.variable_scope("candidate_ends_ffnn"): candidate_ends_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length, emb] candidate_ner_scores = util.bilinear_classifier(candidate_starts_emb,candidate_ends_emb,self.dropout,output_size=self.num_types+1)#[num_sentence, max_sentence_length,max_sentence_length,types+1] candidate_ner_scores = tf.boolean_mask(tf.reshape(candidate_ner_scores,[-1,self.num_types+1]),flattened_candidate_scores_mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=gold_labels, logits=candidate_ner_scores) loss = tf.reduce_sum(loss) return candidate_ner_scores, loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, tag_labels, tag_seq, tag_loss_label): # self.gold_starts = gold_starts # self.gold_ends = gold_ends # self.cluster_ids = cluster_ids self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) # self.text_len_mask = text_len_mask[0] text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices # text_conv = tf.expand_dims(text_outputs, 0) text_conv = tf.expand_dims(flattened_text_emb, 0) text_conv = util.cnn_name(text_conv, [5], 100, 'tag_conv')[0] text_conv = tf.nn.dropout(text_conv, self.dropout) # text_lstm = self.encode_sentences_unilstm(text_conv)[0] # tag_prob = tf.nn.softmax(util.projection_name(text_conv, 3, 'tag_fc'), dim=1) tag_prob = util.projection_name(text_conv, 3, 'tag_fc') # tag_prob_transpose = tf.transpose(tag_prob, [1, 0]) tag_outputs = tf.argmax(tag_prob, axis=1, output_type=tf.int32) tag_high = tf.reduce_max(tag_prob, axis=1) num_words = tf.shape(text_conv)[0] # self.lstm_shape = tf.shape(text_outputs) # self.conv_shape = tf.shape(text_conv) # candidate_starts, candidate_ends = coref_ops.spans( # sentence_indices=flattened_sentence_indices, # max_width=self.max_mention_width) # candidate_starts.set_shape([None]) # candidate_ends.set_shape([None]) mention_starts, mention_ends, mention_scores = coref_ops.memory( tag_seq=tag_outputs, tag_high=tag_high, num_words=1) mention_starts.set_shape([None]) mention_ends.set_shape([None]) mention_scores.set_shape([None]) self.num_mention = tf.shape(mention_starts)[0] self.num_gold_mention = tf.shape(gold_starts)[0] self.num_words = num_words self.mention_starts = mention_starts self.gold_starts = gold_starts self.mention_ends = mention_ends self.tag_outputs = tag_outputs self.tag_seq = tag_seq mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, mention_starts, mention_ends) # [num_candidates, emb] # mention_scores = tf.convert_to_tensor([self.get_mention_prob(tag_prob_transpose, mention_starts[i], mention_ends[i], num_words) # for i in range(tf.shape(mention_starts)[0])]) # mention_scores = tf.squeeze(self.get_mention_scores(mention_emb), 1) # [num_mentions, 1] # candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] # k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) # predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) # predicted_mention_indices.set_shape([None]) # mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] # mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] # mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] # mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] candidate_starts = mention_starts candidate_ends = mention_ends mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1] raw_mention_loss = self.softmax_loss(antecedent_scores, antecedent_labels)# [num_mentions] raw_tagging_loss = tf.nn.softmax_cross_entropy_with_logits(logits=tag_prob, labels=tag_labels) mention_loss = tf.reduce_sum(raw_mention_loss) tagging_loss = tf.reduce_sum(tf.multiply(tf.to_float(tag_loss_label), raw_tagging_loss)) # [] # tagging_loss = tf.reduce_sum(raw_tagging_loss) return [ candidate_starts, candidate_ends, mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, tag_outputs, tag_seq ], mention_loss, tagging_loss
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module( inputs={"tokens": tokens, "sequence_len": text_len}, signature="tokens", as_dict=True) word_emb = lm_embeddings["word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] #debug prev_can_st = candidate_starts prev_can_ends = candidate_ends #debug candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] combined_candidate_st = candidate_starts*10000 + candidate_ends combined_gold_st = gold_starts*10000 + gold_ends _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions] whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates] gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions] candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] #Video Scene Emb ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100] candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100] ''' #Comment : This part is for calculating mention scores and prnunign metnion #It is not used for this task, because mention boundary are given. candidate_mention_scores = self.get_mention_scores(candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] ''' ######## Only Using Gold Span Indices ##### k = tf.to_int32(util.shape(gold_span_indices,0)) top_span_indices = gold_span_indices ############ top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] top_span_genders = tf.gather(genders, top_span_ends) top_span_fpronouns = tf.gather(fpronouns, top_span_ends) # k : total number of candidates span (M in paper) # c : how many antecedents we check (K in paper) c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c) else: #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c) top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c] top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범 dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1] if (self.config["use_gender_logic_rule"]): top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k) ''' marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1) gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) top_antecedent_scores = top_antecedent_prob ''' origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] origin_loss = tf.reduce_sum(origin_loss) # cross_entropy : -1 * ground_truth * log(prediction) #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores)) teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1])) pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) # For Validation Loss marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1) validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) #loss = teacher_loss + origin_loss loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss) top_antecedent_scores = top_antecedent_prob_with_logic else: loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] teacher_loss = loss origin_loss = loss return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, bridging_ante_cids, is_status, us_mask): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if self.lm_file: lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] top_span_starts = gold_starts top_span_ends = gold_ends top_span_cluster_ids = cluster_ids top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, top_span_starts, top_span_ends) top_span_mention_scores = tf.zeros_like(gold_starts, dtype=tf.float32) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) top_span_bridging_ante_cids = bridging_ante_cids top_us_mask = us_mask top_is_status = is_status k = util.shape(top_span_starts, 0) c = tf.minimum(self.config["max_top_antecedents"], k) top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] pair_emb = self.get_pair_embeddings(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c,emb] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] shared_depth = 0 if self.config["shared_depth"] > 0: flattened_pair_emb = tf.reshape( pair_emb, [k * c, util.shape(pair_emb, 2)]) shared_depth = min(self.config["shared_depth"], self.config["ffnn_depth"]) for i in range(shared_depth): hidden_weights = tf.get_variable( "shared_hidden_weights_{}".format(i), [ util.shape(flattened_pair_emb, 1), self.config["ffnn_size"] ]) hidden_bias = tf.get_variable( "shared_hidden_bias_{}".format(i), [self.config["ffnn_size"]]) flattened_pair_emb = tf.nn.relu( tf.nn.xw_plus_b(flattened_pair_emb, hidden_weights, hidden_bias)) flattened_pair_emb = tf.nn.dropout(flattened_pair_emb, self.dropout) pair_emb = tf.reshape(flattened_pair_emb, [k, c, self.config["ffnn_size"]]) ante_score_list = [] pairwise_label_list = [] dummy_scores = tf.zeros([k, 1]) # [k, 1] ante_score_list.append(dummy_scores) with tf.variable_scope("slow_bridging_scores"): slow_bridging_scores = util.ffnn( pair_emb, self.config["ffnn_depth"] - shared_depth, self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_bridging_scores = tf.squeeze(slow_bridging_scores, 2) # [k, c] top_bridging_scores = slow_bridging_scores + top_fast_antecedent_scores ante_score_list.append(top_bridging_scores) bridging_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_bridging_ante_cids, 1)) # [k, c] non_dummy_bridging_indicator = tf.expand_dims( top_span_bridging_ante_cids > 0, 1) # [k, 1] bridging_pairwise_labels = tf.logical_and( bridging_cluster_indicator, non_dummy_bridging_indicator) # [k, c] pairwise_label_list.append(bridging_pairwise_labels) if self.config["train_with_coref"]: with tf.variable_scope("slow_coreference_scores"): slow_coref_scores = util.ffnn( pair_emb, self.config["ffnn_depth"] - shared_depth, self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] slow_coref_scores = tf.squeeze(slow_coref_scores, 2) # [k, c] top_coref_scores = slow_coref_scores + top_fast_antecedent_scores ante_score_list.append(top_coref_scores) coref_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k,c] non_dummy_coref_indicator = tf.expand_dims( top_span_cluster_ids > 0, 1) # [k,1] coref_pairwise_labels = tf.logical_and( coref_cluster_indicator, non_dummy_coref_indicator) # [k,c] pairwise_label_list.append(coref_pairwise_labels) top_antecedent_scores = tf.concat(ante_score_list, 1) # [k, c + 1] or [k, 2*c+1] pairwise_labels = tf.concat(pairwise_label_list, 1) # [k,c] or [k,2*c] top_antecedent_scores = tf.boolean_mask(top_antecedent_scores, top_us_mask) pairwise_labels = tf.boolean_mask(pairwise_labels, top_us_mask) dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] pairwise_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] or [k,2*c+1] loss = self.softmax_loss(top_antecedent_scores, pairwise_labels) loss = tf.reduce_sum(loss) if self.config["use_gold_bridging_anaphora"]: bridging_mask = tf.equal(top_is_status, 2) # bridging top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask) top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask) top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask) top_antecedent_scores_output = tf.boolean_mask( top_bridging_scores, bridging_mask) elif self.config["remove_coref_anaphora"]: bridging_mask = tf.not_equal(top_is_status, 1) # DO top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask) top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask) top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask) top_antecedent_scores_output = tf.boolean_mask( tf.concat([dummy_scores, top_bridging_scores], 1), bridging_mask) else: top_antecedent_scores_output = top_antecedent_scores return [ top_span_starts, top_span_ends, top_span_cluster_ids, top_antecedents, top_antecedent_scores_output ], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): training_num = 0.0 if is_training: training_num = 1.0 #set the dropout rate self.dropout = 1 - (training_num * self.config["dropout_rate"]) # 0.2 self.lexical_dropout = 1 - ( training_num * self.config["lexical_dropout_rate"]) # 0.5 #get the size of tensors num of sentences and max sentence length num_sentences = word_emb.shape[ 0] # number of sentences to predict from max_sentence_length = word_emb.shape[1] #there is a padding to the dataset to have all sentences in the same shape text_emb_list = [ word_emb ] #3D tensor added in an array the 350D word embedding from glove and turian if self.config["char_embedding_size"] > 0: #true is 8 temp_tensor = tf.zeros( [len(self.char_dict), self.config["char_embedding_size"]]) # [115,8] nn.init.xavier_uniform(temp_tensor) char_emb = tf.gather( temp_tensor, char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] #[a vector of embedding 8 for each character for each word for each sentence for all sentences] # (according to longest word and longest sentence) flattened_char_emb = char_emb.view([ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] character level CNN aggregated_char_emb = flattened_aggregated_char_emb.view([ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) #text_emb_list has 3D tensors 350D word embeddings + 150D character embeddings 50 for each 3, 4, 5 filter size and each character is an 8dim vector text_emb = tf.cat(text_emb_list, 2) #concatenated on the second dimension text_emb = F.dropout(text_emb, self.lexical_dropout) text_len_mask = self.sequence_mask(text_len, max_len=max_sentence_length) #tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = text_len_mask.view( [num_sentences * max_sentence_length]) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = F.dropout(text_outputs, self.dropout) genre_tensor = tf.zeros( [len(self.genres), self.config["feature_size"]]) nn.init.xavier_uniform(genre_tensor) genre_emb = tf.gather(genre_tensor, genre) # [emb] sentence_indices = tf.unsqueeze(tf.range(num_sentences), 1).repeat( [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence( text_emb, text_len_mask) # [num_words] candidate_starts, candidate_ends = coref_ops.coref_kernels_spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb( flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.floor((text_outputs.shape[0].float()) * self.config["mention_ratio"]).int() predicted_mention_indices = coref_ops.coref_kernels_extract_mentions( candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather( candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.coref_kernels_antecedents( mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents ) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_antecedent_scores( mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores ], loss
def get_predictions_and_loss(self, inputs): tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, is_training, gold_starts, gold_ends = inputs self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask, self.lstm_dropout) # [num_words, emb] num_words = util.shape(context_outputs, 0) sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_labels = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb, self.dropout) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] loss = self.sigmoid_loss(candidate_mention_scores, candidate_labels) top_span_starts, top_span_ends = self.get_top_mentions( num_words, candidate_starts, candidate_ends, candidate_mention_scores) return [top_span_starts, top_span_ends], loss
def get_embeddings(data, sentences, text_len, context_word_emb, head_word_emb, char_index, lm_emb, lexical_dropout): """Build word-level representations. Args: data: LSGNData object. sentences: string tokens. [batch_size, max_len] text_len: Length of each sentence. [batch_size] context_word_embeddings: head_word_embedding: char_index: Characters lm_emb: Cached contextualized embeddings. lexical_dropout: Tensor scalar """ num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] # Processing char embeddings for batch of sentences if data.char_embedding_size > 0: char_emb = tf.gather( tf.get_variable("char_embeddings", [len(data.char_dict), data.char_embedding_size]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, data.config["filter_widths"], data.config["filter_size"] ) # [num_sentences * max_sentence_length, num-filters * len(filter-sizes)] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) # Processing lm embeddings if data.lm_file or data.lm_hub: # Alternatively, we could initialize module/aggregation/* from here. with tf.variable_scope("lm_aggregation"): lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [data.lm_layers], initializer=tf.constant_initializer(0.0))) lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) # Load lm_embeddings from hub. # if data.lm_hub: # lm_embeddings = data.lm_hub( # inputs={"tokens": sentences, "sequence_len": text_len}, # signature="tokens", as_dict=True) # word_emb = tf.expand_dims(lm_embeddings["word_emb"], 3) # [B, slen, 512] # lm_emb = tf.concat([ # tf.concat([word_emb, word_emb], 2), # [B, slen, 1024, 1] # tf.expand_dims(lm_embeddings["lstm_outputs1"], 3), # tf.expand_dims(lm_embeddings["lstm_outputs2"], 3)], 3) # [B, slen, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) # TODO: Might not need this. lm_num_layers = util.shape(lm_emb, 3) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) # [num_sentences * max_sentence_length * emb, layers] flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= lm_scaling context_emb_list.append(aggregated_lm_emb) else: lm_weights = None lm_scaling = None # Concatenate and apply dropout. context_emb = tf.concat( context_emb_list, 2) # [num_sentences, max_sentence_length, emb concated] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, lexical_dropout) head_emb = tf.nn.dropout(head_emb, lexical_dropout) return context_emb, head_emb, lm_weights, lm_scaling
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, is_training, gold_starts, gold_ends, number_features, plurality_features, candidate_positions, pronoun_positions, status_positions, name_positions, labels, candidate_mask): all_k = util.shape(number_features, 0) all_c = util.shape(number_features, 1) # dropout self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] # 当前example的sentence个数 max_sentence_length = tf.shape(context_word_emb)[1] # sentences中最长的句子长度 context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] # character emb if self.config["char_embedding_size"] > 0: # [num_sentences, max_sentence_length, max_word_length, emb] [?, ?, ?, 8] value = tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]) char_emb = tf.gather(value, char_index) # [num_sentences * max_sentence_length, max_word_length, emb] [?, ?, 8] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, emb] [?, 150] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config[ "filter_size"]) # [num_sentences, max_sentence_length, emb] [?, ?, 150] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) lm_emb_size = util.shape(lm_emb, 2) # 1024 lm_num_layers = util.shape(lm_emb, 3) # 3 with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) # reshape lm_emb [?, 3] flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) # lm_emb matmul weight matrix [num_sentences * max_sentence_length * emb, 1] flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [?, 1] # lm_emb reshape [?, ?, 1024] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling # add elmo emb to context_emb_list if self.config['use_elmo']: context_emb_list.append(aggregated_lm_emb) # add context_emb to context_emb_list [num_sentences, max_sentence_length, emb] [?, ?, 1474] cat多个embedding表示 context_emb = tf.concat(context_emb_list, 2) # add head emb to head_emb_list [num_sentences, max_sentence_length, emb] [?, ?, 450] cat多个head embedding表示 head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] [?, ?, 1474] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] [?, ?, 450] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentence, max_sentence_length] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # context to lstm [num_words, emb] [?, 400] context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) num_words = util.shape(context_outputs, 0) # [num_words] [?, 450] flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) top_span_starts = gold_starts top_span_ends = gold_ends # get span emb [?, 1270] top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, top_span_starts, top_span_ends) # [k, max_candidate, embedding] [?, ?, 1270] candidate_NP_embeddings = tf.gather(top_span_emb, candidate_positions) # [k1, embedding] pronoun_embedding = tf.gather(top_span_emb, pronoun_positions) # [k2, embedding] status_embedding = tf.gather(top_span_emb, status_positions) # [k3, embedding] name_embedding = tf.gather(top_span_emb, name_positions) # [k, max_candidate] candidate_starts = tf.gather(top_span_starts, candidate_positions) # [k, 1] [?, ?] pronoun_starts = tf.gather(top_span_starts, pronoun_positions) # [k] [?, ?] top_span_speaker_ids = tf.gather(speaker_ids, candidate_starts) # [k, 1] [?, ?] pronoun_speaker_id = tf.gather(speaker_ids, pronoun_starts) mention_offsets = tf.range(util.shape(top_span_emb, 0)) + 1 candidate_NP_offsets = tf.gather(mention_offsets, candidate_positions) pronoun_offsets = tf.gather(mention_offsets, pronoun_positions) k = util.shape(pronoun_positions, 0) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): coreference_scores = self.get_coreference_score(candidate_NP_embeddings, pronoun_embedding, top_span_speaker_ids, pronoun_speaker_id, candidate_NP_offsets, pronoun_offsets, number_features, plurality_features) # [k, c] score_after_softmax = tf.nn.softmax(coreference_scores, 1) # [k, c] if self.config['softmax_pruning']: threshold = tf.ones([all_k, all_c]) * self.config['softmax_threshold'] # [k, c] else: threshold = tf.zeros([all_k, all_c]) - tf.ones([all_k, all_c]) ranking_mask = tf.to_float(tf.greater(score_after_softmax, threshold)) # [k, c] if self.config['apply_knowledge']: with tf.variable_scope("knowledge_layer"): # [k, c] knowledge_score, merged_score, attention_score, diagonal_mask, square_mask = self.get_knowledge_score( candidate_NP_embeddings, number_features, plurality_features, candidate_mask * ranking_mask) coreference_scores = coreference_scores + knowledge_score # [k, c] if self.config['knowledge_pruning']: knowledge_score_after_softmax = tf.nn.softmax(knowledge_score, 1) # [k, c] knowledge_threshold = tf.ones([all_k, all_c]) * self.config['softmax_threshold'] # [k, c] knowledge_ranking_mask = tf.to_float( tf.greater(knowledge_score_after_softmax, knowledge_threshold)) # [k, c] ranking_mask = ranking_mask * knowledge_ranking_mask # dummy_scores为零向量,不需要softmax,所以让coreference_scores进softmax,再将output和dummy_scores进行concat。 top_antecedent_scores = tf.concat([dummy_scores, coreference_scores], 1) # [k, c + 1] # labels为正,且得分在threshold之上的为True. labels = tf.logical_and(labels, tf.greater(score_after_softmax, threshold)) dummy_mask_1 = tf.ones([k, 1]) dummy_mask_0 = tf.zeros([k, 1]) mask_for_prediction = tf.concat([dummy_mask_0, candidate_mask], 1) ranking_mask_for_prediction = tf.concat([dummy_mask_0, ranking_mask], 1) if self.config['random_sample_training']: random_mask = tf.greater(tf.random_uniform([all_k, all_c]), tf.ones([all_k, all_c]) * 0.3) labels = tf.logical_and(labels, random_mask) ranking_mask = ranking_mask * tf.to_float(random_mask) dummy_labels = tf.logical_not(tf.reduce_any(labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, labels], 1) # [k, c + 1] mask_for_training = tf.concat([dummy_mask_1, candidate_mask], 1) ranking_mask_for_training = tf.concat([dummy_mask_1, ranking_mask], 1) loss = self.softmax_loss(top_antecedent_scores * mask_for_training * ranking_mask_for_training, top_antecedent_labels) loss = tf.reduce_sum(loss) # [] return [top_antecedent_scores * mask_for_prediction * ranking_mask_for_prediction, score_after_softmax * candidate_mask], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tags, ner_tags, categories, ner_ids, cat_glove, domain_labels, l): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) if self.config["use_pos_tag"]: text_emb_list.append(pos_tags) if self.config["use_ner_g"]: text_emb_list.append(ner_tags) if self.config["use_categories"]: text_emb_list.append(categories) if self.config["use_categories_glove"]: text_emb_list.append(cat_glove) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] # print "------------------------------" # print "GENRE EMB" # print genre_emb.shape, genre sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) # get_mention_scores call util.ffnn candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] mention_ner_ids = tf.gather(ner_ids, mention_starts) max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) # get_antecedent_scores calls util.ffnn antecedent_scores, pair_emb = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids) # [num_mentions, max_ant + 1] # antecedent scores are floating points # antecedent labels are True/False # softmax_loss converts True/False into floating points loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] print "---------------------------------------" print "PAIR EMB" print pair_emb.shape print "---------------------------------------" print "CANDIDATE" print candidate_mention_emb.shape print "---------------------------------------" # if self.config["use_dann"]: d_logits = util.dann(candidate_mention_emb, self.config["ffnn_size"], len(self.genres), self.dropout, l=l, name="1") # elif self.config["use_dann_pairwise"]: d_logits2 = util.dann(pair_emb, self.config["ffnn_size"], len(self.genres), self.dropout, l=l, name="2") print "---------------------------------------" print "D_LOGITS" print d_logits2.shape print "---------------------------------------" d_probs2 = tf.nn.softmax(d_logits2) d_probs = tf.nn.softmax(d_logits) # P(genre | mentions) neg_ll = -tf.log(tf.clip_by_value(d_probs,1e-10,1.0)) # N x 7 N = tf.shape(neg_ll)[0] # argmax of d_probs are predicted domains pred_domains = tf.argmax(d_probs, 1) # changing 1 x 7 domain labels to N x 7 tiled_domain_labels = tf.tile(tf.expand_dims(domain_labels, 0), [N, 1]) # converting tiles to be comparable to pred_domains gold_domains = tf.argmax(tiled_domain_labels, 1) # multiply neg_ll with tiled_domain_labels pairwise_loss = tf.multiply(neg_ll, tiled_domain_labels) # N x 7 pairwise_loss_reduced = tf.reduce_sum(pairwise_loss, 0) domain_loss = tf.reduce_sum(tf.divide(pairwise_loss_reduced, tf.cast(N, tf.float32))) domain_loss_reduce_mean = tf.reduce_sum(tf.reduce_mean(pairwise_loss, 0)) correct_domain_predictions = tf.equal(pred_domains, gold_domains) domain_accuracy = tf.reduce_mean(tf.cast(correct_domain_predictions, tf.float32)) values = [domain_accuracy, domain_loss_reduce_mean, pairwise_loss_reduced, N, neg_ll, d_logits2, d_logits] return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, antecedent_labels, genre], loss, domain_loss, pred_domains, values
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): if self.config["char_embedding_size"] > 0: with tf.variable_scope("char_emb", reuse=tf.AUTO_REUSE): char_emb = tf.gather( tf.get_variable("char_embeddings", [ len(self.char_dict), self.config["char_embedding_size"] ]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] if not self.lm_file: elmo_module = hub.Module(str(self.config["elmo_hub_path"])) lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout( self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: flattened_char_emb = tf.reshape( char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ] ) # [num_sentences * max_sentence_length, max_word_length, emb] with tf.variable_scope("char_convolution", reuse=tf.AUTO_REUSE): flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape( flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation", reuse=tf.AUTO_REUSE): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat( context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat( head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout ) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout ) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length ) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) with tf.variable_scope("genre_emb", reuse=tf.AUTO_REUSE): genre_emb = tf.gather( tf.get_variable( "genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length ]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices) ) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb_orig, candidate_head_scores = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] self.candidate_span_emb = candidate_span_emb_orig def compute_from_emb(candidate_span_emb): with tf.variable_scope("prediction_scope", reuse=tf.AUTO_REUSE): candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather( candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] self.head_scores = tf.gather( candidate_head_scores, top_span_indices) # [k, max_span_width] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=tf.AUTO_REUSE): top_antecedent_emb = tf.gather( top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([ tf.expand_dims(top_span_emb, 1), top_antecedent_emb ], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat( [top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather( top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat( [dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss predictions, loss = compute_from_emb(candidate_span_emb_orig) """alpha = self.config['adv_lr'] num_iters = self.config['adv_num_iters'] print(tf.shape(candidate_span_emb_orig)) delta = tf.random.uniform(shape=tf.shape(candidate_span_emb_orig), minval=-1*self.config['adv_perturb_length'], maxval=self.config['adv_perturb_length']) print(tf.shape(delta)) for t in range(num_iters): _, adv_loss_t = compute_from_emb(candidate_span_emb_orig+delta) delta_grad = tf.gradients(adv_loss_t, delta, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) delta_grad = tf.stop_gradient(delta_grad) delta = tf.clip_by_value(delta+alpha*delta_grad[0,:,:], -1*self.config['adv_perturb_length'], self.config['adv_perturb_length']) print(tf.shape(delta))""" span_emb_grad, = tf.gradients( loss, candidate_span_emb_orig, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) span_emb_grad = tf.stop_gradient(span_emb_grad) def normalize_vec(x, length): return length * tf.math.l2_normalize(x) perturb_span_emb = normalize_vec(span_emb_grad, self.config['adv_perturb_length']) _, adv_loss = compute_from_emb(candidate_span_emb_orig + perturb_span_emb) # _, adv_loss = compute_from_emb(candidate_span_emb_orig+delta) return predictions, (1 - self.config['adv_alpha'] ) * loss + self.config['adv_alpha'] * adv_loss
def get_predictions_and_loss( self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, number_features, gender_features, nsubj_features, dobj_features, candidate_positions, pronoun_positions, labels, candidate_mask): all_k = util.shape(number_features, 0) all_c = util.shape(number_features, 1) self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling if self.config['use_elmo']: context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] top_span_starts = gold_starts top_span_ends = gold_ends top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, top_span_starts, top_span_ends) candidate_NP_embeddings = tf.gather( top_span_emb, candidate_positions) # [k, max_candidate, embedding] candidate_starts = tf.gather(top_span_starts, candidate_positions) # [k, max_candidate] pronoun_starts = tf.gather(top_span_starts, pronoun_positions) # [k, 1] top_span_speaker_ids = tf.gather(speaker_ids, candidate_starts) # [k] pronoun_embedding = tf.gather(top_span_emb, pronoun_positions) # [k, embedding] pronoun_speaker_id = tf.gather(speaker_ids, pronoun_starts) # [k, 1] mention_offsets = tf.range(util.shape(top_span_emb, 0)) + 1 candidate_NP_offsets = tf.gather(mention_offsets, candidate_positions) pronoun_offsets = tf.gather(mention_offsets, pronoun_positions) k = util.shape(pronoun_positions, 0) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): coreference_scores = self.get_coreference_score( candidate_NP_embeddings, pronoun_embedding, top_span_speaker_ids, pronoun_speaker_id, genre_emb, candidate_NP_offsets, pronoun_offsets, number_features, gender_features, nsubj_features, dobj_features) # [k, c] score_after_softmax = tf.nn.softmax(coreference_scores, 1) # [k, c] if self.config['softmax_pruning']: threshold = tf.ones( [all_k, all_c]) * self.config['softmax_threshold'] # [k, c] else: threshold = tf.zeros([all_k, all_c]) - tf.ones([all_k, all_c]) ranking_mask = tf.to_float(tf.greater(score_after_softmax, threshold)) # [k, c] # number_features = tf.boolean_mask(number_features, ranking_mask) # gender_features = tf.boolean_mask(gender_features, ranking_mask) # nsubj_features = tf.boolean_mask(nsubj_features, ranking_mask) # dobj_features = tf.boolean_mask(dobj_features, ranking_mask) # coreference_scores = tf.boolean_mask(coreference_scores, ranking_mask) # labels = tf.boolean_mask(labels, ranking_mask) if self.config['apply_knowledge']: with tf.variable_scope("knowledge_layer"): knowledge_score, merged_score, attention_score, diagonal_mask, square_mask = self.get_knowledge_score( candidate_NP_embeddings, number_features, gender_features, nsubj_features, dobj_features, candidate_mask * ranking_mask) # [k, c] coreference_scores = coreference_scores + knowledge_score # [k, c] if self.config['knowledge_pruning']: knowledge_score_after_softmax = tf.nn.softmax( knowledge_score, 1) # [k, c] knowledge_threshold = tf.ones([ all_k, all_c ]) * self.config['softmax_threshold'] # [k, c] knowledge_ranking_mask = tf.to_float( tf.greater(knowledge_score_after_softmax, knowledge_threshold)) # [k, c] ranking_mask = ranking_mask * knowledge_ranking_mask else: knowledge_score = tf.zeros([all_k, all_c]) knowledge_score_after_softmax = tf.nn.softmax(knowledge_score, 1) # [k, c] merged_score = tf.zeros([all_k, all_c]) attention_score = tf.zeros([all_k, all_c]) diagonal_mask = tf.zeros([all_k, all_c]) square_mask = tf.zeros([all_k, all_c]) top_antecedent_scores = tf.concat([dummy_scores, coreference_scores], 1) # [k, c + 1] labels = tf.logical_and(labels, tf.greater(score_after_softmax, threshold)) dummy_mask_1 = tf.ones([k, 1]) dummy_mask_0 = tf.zeros([k, 1]) mask_for_prediction = tf.concat([dummy_mask_0, candidate_mask], 1) ranking_mask_for_prediction = tf.concat([dummy_mask_0, ranking_mask], 1) if self.config['random_sample_training']: random_mask = tf.greater(tf.random_uniform([all_k, all_c]), tf.ones([all_k, all_c]) * 0.3) labels = tf.logical_and(labels, random_mask) ranking_mask = ranking_mask * tf.to_float(random_mask) dummy_labels = tf.logical_not(tf.reduce_any(labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, labels], 1) # [k, c + 1] mask_for_training = tf.concat([dummy_mask_1, candidate_mask], 1) ranking_mask_for_training = tf.concat([dummy_mask_1, ranking_mask], 1) loss = self.softmax_loss( top_antecedent_scores * mask_for_training * ranking_mask_for_training, top_antecedent_labels) loss = tf.reduce_sum(loss) # [] return [ top_antecedent_scores * mask_for_prediction * ranking_mask_for_prediction, score_after_softmax * candidate_mask ], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) self.tm_shape = tf.shape(text_len_mask) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) self.txt_shape = tf.shape(text_outputs) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices self.emb_shape = tf.shape(flattened_text_emb) candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) # predicted_mention_indices, context_starts, context_ends, context_length = coref_ops.extract_mentions(candidate_mention_scores, # candidate_starts, # candidate_ends, # k, # self.max_context_width) # ([k], [k]) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) # context_starts.set_shape([None]) # context_ends.set_shape([None]) # context_length.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] context_starts = tf.maximum(mention_starts - 5, 0) context_ends = tf.minimum(mention_ends + 5, util.shape(text_outputs, 0) - 1) context_start_emb = tf.gather(text_outputs, context_starts) # [num_mentions, emb] context_end_emb = tf.gather(text_outputs, context_ends) # [num_mentions, emb] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents)# ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_context_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, context_starts, context_ends, text_outputs, flattened_text_emb) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores ], loss
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] # self.a , self.b = text_len , max_sentence_length text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) k = tf.minimum(500, k) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] # c = tf.minimum(self.config["max_top_antecedents"], k) # self.top = top_span_emb orig_dim = 1270 with tf.name_scope("transformer"): with tf.name_scope("embedding_transformer"): W = tf.Variable(tf.random_normal((orig_dim, self.new_dim))) b = tf.Variable(tf.random_normal((self.new_dim, ))) temp_input = tf.nn.relu(tf.matmul(top_span_emb, W) + b) padding_mask_partial = tf.cast(tf.sequence_mask( tf.shape(temp_input)[0], maxlen=self.seq_length), dtype=tf.float32) multiples = [self.seq_length] padding_mask_partial2 = tf.tile(padding_mask_partial, multiples) enc_padding_mask = tf.reshape(padding_mask_partial2, [multiples[0], -1]) # enc_padding_mask = tf.matrix_set_diag(enc_padding_mask, tf.zeros(enc_padding_mask.shape[0:-1]), name=None) dec_padding_mask = tf.reshape(padding_mask_partial2, [multiples[0], -1]) dec_padding_mask = tf.matrix_set_diag( dec_padding_mask, tf.zeros(dec_padding_mask.shape[0:-1]), name=None) look_ahead_mask = create_look_ahead_mask( tf.shape(padding_mask_partial)[0]) combined_mask = tf.minimum(enc_padding_mask, look_ahead_mask) s = tf.shape(temp_input) paddings = [[0, self.seq_length - s[0]], [0, 0]] padded_embd = tf.pad(temp_input, paddings, "CONSTANT") predictions, _ = self.sample_transformer(padded_embd, padded_embd, True, enc_padding_mask, combined_mask, dec_padding_mask) # self.chikka = predictions # self.chikka2 = predictions[:k] top_span_emb = tf.concat([predictions[:k], top_span_emb], 1) # hidd = self.new_dim // 3 # with tf.name_scope("Scorer"): # h1_1 = tf.layers.dense(predictions, hidd) # h1_2 = tf.layers.dense(predictions, hidd) # h1 = tf.concat([h1_1 , h1_2] , 1 ) # W2 = tf.Variable(tf.random_normal((hidd*2, 1))) # b2 = tf.Variable(tf.random_normal((1,))) # score = tf.nn.relu(tf.matmul(h1, W) + b) c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] # with tf.variable_scope("coref_layer"): # top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] # top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, inject_starts, inject_ends): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] if self._use_injected_mentions(is_training): candidate_starts = tf.transpose(tf.expand_dims(inject_starts, 1)) candidate_ends = tf.transpose(tf.expand_dims(inject_ends, 1)) else: candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] if self._use_injected_mentions(is_training): k = tf.shape(candidate_starts)[0] top_span_indices = tf.expand_dims(tf.range(k), 0) else: k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb, char_index, text_len, is_training, gold_starts, gold_ends, antecedents, antecedents_len, anaphors, gold_labels): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if self.lm_file: lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] mention_emb = self.get_span_emb(flattened_head_emb, context_outputs, gold_starts, gold_ends) k = util.shape(antecedents, 0) c = util.shape(antecedents, 1) anaphor_emb = tf.gather(mention_emb, anaphors) #[k,emb] antecedent_emb = tf.gather(mention_emb, antecedents) # [k, c, emb] pair_emb = self.get_pair_embeddings(anaphor_emb, antecedents, antecedent_emb) # [k, c,emb] with tf.variable_scope("plural_scores"): plural_scores = util.ffnn(pair_emb, self.config["ffnn_depth"], self.config["ffnn_size"], 1, self.dropout) # [k, c, 1] plural_scores = tf.squeeze(plural_scores, 2) # [k, c] plural_scores = plural_scores + tf.log( tf.sequence_mask(antecedents_len, c, tf.float32)) dummy_scores = tf.zeros([k, 1]) dummy_labels = tf.logical_not( tf.reduce_any(gold_labels, 1, keepdims=True)) # [k, 1] plural_scores_with_dummy = tf.concat([dummy_scores, plural_scores], 1) gold_labels_with_dummy = tf.concat([dummy_labels, gold_labels], 1) loss = self.softmax_loss(plural_scores_with_dummy, gold_labels_with_dummy) loss = tf.reduce_sum(loss) return [plural_scores, antecedents_len, anaphors], loss