def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) self.tm_shape = tf.shape(text_len_mask) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) self.txt_shape = tf.shape(text_outputs) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices self.emb_shape = tf.shape(flattened_text_emb) candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) # predicted_mention_indices, context_starts, context_ends, context_length = coref_ops.extract_mentions(candidate_mention_scores, # candidate_starts, # candidate_ends, # k, # self.max_context_width) # ([k], [k]) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) # context_starts.set_shape([None]) # context_ends.set_shape([None]) # context_length.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] context_starts = tf.maximum(mention_starts - 5, 0) context_ends = tf.minimum(mention_ends + 5, util.shape(text_outputs, 0) - 1) context_start_emb = tf.gather(text_outputs, context_starts) # [num_mentions, emb] context_end_emb = tf.gather(text_outputs, context_ends) # [num_mentions, emb] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents)# ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_context_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, context_starts, context_ends, text_outputs, flattened_text_emb) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores ], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, span_labels, span_seq): self.gold_starts = gold_starts self.cluster_ids = cluster_ids span_seq_bin = tf.where(tf.greater(span_seq, 0), tf.ones_like(span_seq), tf.zeros_like(span_seq)) span_labels_bin = tf.one_hot(span_seq_bin, 2) # [num_mention, 2] self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) # self.text_len_mask = text_len_mask[0] text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices candidate_starts, candidate_ends = coref_ops.regression( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_logits = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 2] # mention_loss = tf.losses.softmax_cross_entropy(span_labels_bin, candidate_mention_logits) # mention_loss = tf.nn.softmax_cross_entropy_with_logits(logits=candidate_mention_logits, # labels=span_labels_bin) # mention_loss = tf.reduce_sum(mention_loss) # mention_loss = tf.reduce_sum(mention_loss * tf.cast(span_seq_bin, tf.float32)) # candidate_mention_scores = tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 1)) -\ # tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 0)) candidate_mention_scores = tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 1)) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) # k = tf.cond(is_training, lambda: k, lambda: tf.shape(text_outputs)[0]) # k = tf.cond(is_training, lambda: tf.minimum(k, 250), lambda: k) k = tf.minimum(k, 250) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions, 1] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents * 10) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_features = self.get_antecedent_features(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1] # antecedent_loss = tf.reduce_sum(self.softmax_loss(antecedent_scores, antecedent_labels)) # antecedent_scores = tf.expand_dims(antecedent_scores, 0) # mention_loss = tf.losses.softmax_cross_entropy(span_labels_bin, mention_logits) # mention_scores = tf.squeeze(tf.gather(tf.transpose(mention_logits), 1)) span_emb = tf.expand_dims(mention_emb, 0) # span_labels = tf.gather(span_labels, predicted_mention_indices) span_seq_sorted = coref_ops.tagging(tf.gather(span_seq, predicted_mention_indices)) predicted_mention_indices.set_shape([None]) span_labels = tf.expand_dims(tf.one_hot(span_seq_sorted, 100), 0) self.span_labels = tf.reduce_sum(span_labels) self.span_seq = span_seq_sorted # text_conv = tf.expand_dims(text_outputs, 0) # text_conv = tf.expand_dims(tf.concat([text_outputs, flattened_text_emb], 1), 0) # text_conv = util.cnn_name(text_conv, [5], 100, 'tag_conv') # text_conv = tf.nn.dropout(text_conv, self.dropout) # self.mention_scores = mention_scores outputs = self.gnc_tagging(span_emb, span_labels, span_seq_sorted, mention_scores, antecedent_features, k) # [1, num_words, 100] ? # x = tf.matmul(outputs, tf.zeros([10, 80])) logits, antecedent_scores = tf.split(outputs, [100, k + 1], 2) self.logits_shape = tf.shape(logits) self.antecedent_scores_shape = tf.shape(antecedent_scores) # antecedent_scores = tf.gather(attention[0], tf.range(k)) predictions = tf.argmax(logits, axis=2) # [1, num_words] ? # loss = tf.losses.softmax_cross_entropy(span_labels, tf.squeeze(logits)) # loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=tf.squeeze(logits), labels=span_labels)) # span_seq_expanded = tf.expand_dims(span_seq_sorted, 0) # loss = tf.losses.log_loss(span_labels, tf.squeeze(tf.nn.softmax(logits, dim=2))) + .1 * mention_loss # self.tag_seq = tag_seq # tagging_loss = tf.reduce_sum(self.sigmoid_margin(logits, tf.to_float(span_labels), tf.to_float(k))) tagging_loss = tf.reduce_sum(self.exp_loss_margin(logits, tf.to_float(span_labels), 2)) antecedent_loss = tf.reduce_sum(self.softmax_loss(antecedent_scores[0], antecedent_labels)) # antecedent_loss = tf.reduce_sum(self.exp_loss_margin(antecedent_scores[0], antecedent_labels, 1)) # y = tf.squeeze(tf.nn.softmax(logits, dim=2)) # loss = -tf.reduce_sum(span_labels * tf.log(y) + (1 - span_labels) * tf.log(1 - y)) # loss = -tf.reduce_sum((1 - span_labels) * tf.log(1 - y)) self.tagging_loss = tagging_loss # self.mention_loss = mention_loss self.antecedent_loss = antecedent_loss loss = tagging_loss + antecedent_loss # + mention_loss self.p = predictions return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, predictions, antecedents, antecedent_scores[0] ], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.gold_starts = gold_starts self.gold_ends = gold_ends self.cluster_ids = cluster_ids self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) # self.text_len_mask = text_len_mask[0] text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence( text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices word_nemb = tf.concat([text_outputs, flattened_text_emb], 1) # word_nemb = tf.reshape(word_nemb, [tf.shape(text_outputs)[0], tf.shape(text_outputs)[1] + tf.shape(flattened_text_emb)[1]]) with tf.variable_scope("conv_score"): # nemb_size = util.shape(word_nemb, 1) # w = tf.get_variable("w", [3, nemb_size, 150]) # b = tf.get_variable("b", [150]) # conv = tf.nn.conv1d(tf.expand_dims(word_nemb, 0), w, stride=1, padding="SAME") # h = tf.nn.relu(tf.nn.bias_add(conv, b)) candidate_word_scores = util.projection(word_nemb, 2) start_scores, end_scores = tf.split(candidate_word_scores, [1, 1], 1) start_scores = tf.reshape(start_scores, [-1]) end_scores = tf.reshape(end_scores, [-1]) candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb( flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] candidate_mention_scores += tf.gather(start_scores, candidate_starts) + tf.gather( end_scores, candidate_ends) k = tf.to_int32( tf.floor( tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) k = tf.minimum(k, self.config["max_antecedents"]) predicted_mention_indices = coref_ops.extract_mentions( candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather( candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] # mention_emb = tf.reshape(mention_emb, [k, -1]) mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] word_scores = tf.gather(candidate_word_scores, predicted_mention_indices) mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents( mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, k ) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) antecedent_scores = self.get_antecedent_scores( mention_emb, word_nemb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, speaker_ids, genre_emb, tf.expand_dims(start_scores, 1), tf.expand_dims(end_scores, 1), k) # [num_mentions, max_ant + 1] loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores ], loss
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tags, ner_tags, categories, ner_ids, cat_glove, domain_labels, l): self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) if self.config["use_pos_tag"]: text_emb_list.append(pos_tags) if self.config["use_ner_g"]: text_emb_list.append(ner_tags) if self.config["use_categories"]: text_emb_list.append(categories) if self.config["use_categories_glove"]: text_emb_list.append(cat_glove) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] # print "------------------------------" # print "GENRE EMB" # print genre_emb.shape, genre sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) # get_mention_scores call util.ffnn candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] mention_ner_ids = tf.gather(ner_ids, mention_starts) max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) # get_antecedent_scores calls util.ffnn antecedent_scores, pair_emb = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids) # [num_mentions, max_ant + 1] # antecedent scores are floating points # antecedent labels are True/False # softmax_loss converts True/False into floating points loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] print "---------------------------------------" print "PAIR EMB" print pair_emb.shape print "---------------------------------------" print "CANDIDATE" print candidate_mention_emb.shape print "---------------------------------------" # if self.config["use_dann"]: d_logits = util.dann(candidate_mention_emb, self.config["ffnn_size"], len(self.genres), self.dropout, l=l, name="1") # elif self.config["use_dann_pairwise"]: d_logits2 = util.dann(pair_emb, self.config["ffnn_size"], len(self.genres), self.dropout, l=l, name="2") print "---------------------------------------" print "D_LOGITS" print d_logits2.shape print "---------------------------------------" d_probs2 = tf.nn.softmax(d_logits2) d_probs = tf.nn.softmax(d_logits) # P(genre | mentions) neg_ll = -tf.log(tf.clip_by_value(d_probs,1e-10,1.0)) # N x 7 N = tf.shape(neg_ll)[0] # argmax of d_probs are predicted domains pred_domains = tf.argmax(d_probs, 1) # changing 1 x 7 domain labels to N x 7 tiled_domain_labels = tf.tile(tf.expand_dims(domain_labels, 0), [N, 1]) # converting tiles to be comparable to pred_domains gold_domains = tf.argmax(tiled_domain_labels, 1) # multiply neg_ll with tiled_domain_labels pairwise_loss = tf.multiply(neg_ll, tiled_domain_labels) # N x 7 pairwise_loss_reduced = tf.reduce_sum(pairwise_loss, 0) domain_loss = tf.reduce_sum(tf.divide(pairwise_loss_reduced, tf.cast(N, tf.float32))) domain_loss_reduce_mean = tf.reduce_sum(tf.reduce_mean(pairwise_loss, 0)) correct_domain_predictions = tf.equal(pred_domains, gold_domains) domain_accuracy = tf.reduce_mean(tf.cast(correct_domain_predictions, tf.float32)) values = [domain_accuracy, domain_loss_reduce_mean, pairwise_loss_reduced, N, neg_ll, d_logits2, d_logits] return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, antecedent_labels, genre], loss, domain_loss, pred_domains, values
def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.gold_starts = gold_starts self.gold_ends = gold_ends self.cluster_ids = cluster_ids self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"]) num_sentences = tf.shape(word_emb)[0] max_sentence_length = tf.shape(word_emb)[1] text_emb_list = [word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] text_emb_list.append(aggregated_char_emb) text_emb = tf.concat(text_emb_list, 2) text_emb = tf.nn.dropout(text_emb, self.lexical_dropout) text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length]) # self.text_len_mask = text_len_mask[0] text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask) text_outputs = tf.nn.dropout(text_outputs, self.dropout) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_text_emb = self.flatten_emb_by_sentence( text_emb, text_len_mask) # [num_words] self.flattened_sentence_indices = flattened_sentence_indices candidate_starts, candidate_ends = coref_ops.spans( sentence_indices=flattened_sentence_indices, max_width=self.max_mention_width) candidate_starts.set_shape([None]) candidate_ends.set_shape([None]) candidate_mention_emb = self.get_mention_emb( flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_mention_emb) # [num_mentions, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"])) k = tf.minimum(k, self.config["max_antecedents"]) predicted_mention_indices = coref_ops.extract_mentions( candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k]) predicted_mention_indices.set_shape([None]) mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions] mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions] mention_emb = tf.gather( candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb] mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions] max_antecedents = self.config["max_antecedents"] antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents( mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents ) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions] antecedents.set_shape([None, None]) antecedent_labels.set_shape([None, None]) antecedents_len.set_shape([None]) A = tf.eye(k) with tf.variable_scope("layer_1"): antecedent_scores_1 = self.get_antecedent_scores( mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, k) # [num_mentions, max_ant + 1] DAD_1 = self.get_DAD(A, antecedent_scores_1, k) mention_emb_1 = tf.nn.relu( util.projection(tf.matmul(DAD_1, mention_emb), 200)) with tf.variable_scope("layer_2"): antecedent_scores_2 = self.get_antecedent_scores( mention_emb_1, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, k) # [num_mentions, max_ant + 1] DAD_2 = self.get_DAD(A, antecedent_scores_2, k) mention_emb_2 = tf.nn.relu( util.projection(tf.matmul(DAD_2, mention_emb), 200)) with tf.variable_scope("layer_3"): antecedent_scores = self.get_antecedent_scores( mention_emb_2, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, k) # [num_mentions, max_ant + 1] antecedent_scores += antecedent_scores_1 loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions] # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores ], loss