def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
    self.tm_shape = tf.shape(text_len_mask)

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)
    self.txt_shape = tf.shape(text_outputs)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    self.flattened_sentence_indices = flattened_sentence_indices
    self.emb_shape = tf.shape(flattened_text_emb)

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    # predicted_mention_indices, context_starts, context_ends, context_length = coref_ops.extract_mentions(candidate_mention_scores,
    #                           candidate_starts,
    #                           candidate_ends,
    #                           k,
    #                           self.max_context_width) # ([k], [k])
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores,
                              candidate_starts,
                              candidate_ends,
                              k) # ([k], [k])
    predicted_mention_indices.set_shape([None])
    # context_starts.set_shape([None])
    # context_ends.set_shape([None])
    # context_length.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    context_starts = tf.maximum(mention_starts - 5, 0)
    context_ends = tf.minimum(mention_ends + 5, util.shape(text_outputs, 0) - 1)

    context_start_emb = tf.gather(text_outputs, context_starts) # [num_mentions, emb]
    context_end_emb = tf.gather(text_outputs, context_ends) # [num_mentions, emb]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts,
                                                                              mention_ends,
                                                                              gold_starts,
                                                                              gold_ends,
                                                                              cluster_ids,
                                                                              max_antecedents)# ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_context_antecedent_scores(mention_emb,
                                                    mention_scores,
                                                    antecedents,
                                                    antecedents_len,
                                                    mention_starts,
                                                    mention_ends,
                                                    mention_speaker_ids,
                                                    genre_emb,
                                                    context_starts,
                                                    context_ends,
                                                    text_outputs,
                                                    flattened_text_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [
            candidate_starts,
            candidate_ends,
            candidate_mention_scores,
            mention_starts,
            mention_ends,
            antecedents,
            antecedent_scores
          ], loss
Exemple #2
0
  def get_predictions_and_loss(self,
                              word_emb,
                              char_index,
                              text_len,
                              speaker_ids,
                              genre,
                              is_training,
                              gold_starts,
                              gold_ends,
                              cluster_ids,
                              span_labels,
                              span_seq):

    self.gold_starts = gold_starts
    self.cluster_ids = cluster_ids

    span_seq_bin = tf.where(tf.greater(span_seq, 0), tf.ones_like(span_seq), tf.zeros_like(span_seq))
    span_labels_bin = tf.one_hot(span_seq_bin, 2) # [num_mention, 2]

    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
    # self.text_len_mask = text_len_mask[0]

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    self.flattened_sentence_indices = flattened_sentence_indices

    candidate_starts, candidate_ends = coref_ops.regression(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_logits =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 2]

    # mention_loss = tf.losses.softmax_cross_entropy(span_labels_bin, candidate_mention_logits)
    # mention_loss = tf.nn.softmax_cross_entropy_with_logits(logits=candidate_mention_logits,
    #                                                       labels=span_labels_bin)
    
    # mention_loss = tf.reduce_sum(mention_loss)
    # mention_loss = tf.reduce_sum(mention_loss * tf.cast(span_seq_bin, tf.float32))

    # candidate_mention_scores = tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 1)) -\
    #                             tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 0))
    
    candidate_mention_scores = tf.squeeze(tf.gather(tf.transpose(candidate_mention_logits), 1)) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    # k = tf.cond(is_training, lambda: k, lambda: tf.shape(text_outputs)[0])
    # k = tf.cond(is_training, lambda: tf.minimum(k, 250), lambda: k)
    k = tf.minimum(k, 250)
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions, 1]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts,
            mention_ends,
            gold_starts,
            gold_ends,
            cluster_ids,
            max_antecedents * 10) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_features = self.get_antecedent_features(mention_emb,
            mention_scores,
            antecedents,
            antecedents_len,
            mention_starts,
            mention_ends,
            mention_speaker_ids,
            genre_emb) # [num_mentions, max_ant + 1]
    
    # antecedent_loss = tf.reduce_sum(self.softmax_loss(antecedent_scores, antecedent_labels))
    
    # antecedent_scores = tf.expand_dims(antecedent_scores, 0)

    # mention_loss = tf.losses.softmax_cross_entropy(span_labels_bin, mention_logits)
    # mention_scores = tf.squeeze(tf.gather(tf.transpose(mention_logits), 1))

    span_emb = tf.expand_dims(mention_emb, 0)
    # span_labels = tf.gather(span_labels, predicted_mention_indices)
    span_seq_sorted = coref_ops.tagging(tf.gather(span_seq, predicted_mention_indices))
    predicted_mention_indices.set_shape([None])

    span_labels = tf.expand_dims(tf.one_hot(span_seq_sorted, 100), 0)

    self.span_labels = tf.reduce_sum(span_labels)
    self.span_seq = span_seq_sorted

    # text_conv = tf.expand_dims(text_outputs, 0)
    # text_conv = tf.expand_dims(tf.concat([text_outputs, flattened_text_emb], 1), 0)
    # text_conv = util.cnn_name(text_conv, [5], 100, 'tag_conv')
    # text_conv = tf.nn.dropout(text_conv, self.dropout)
    # self.mention_scores = mention_scores

    outputs = self.gnc_tagging(span_emb, span_labels, span_seq_sorted, mention_scores, antecedent_features, k) # [1, num_words, 100] ?
    # x = tf.matmul(outputs, tf.zeros([10, 80]))

    logits, antecedent_scores = tf.split(outputs, [100, k + 1], 2)
    self.logits_shape = tf.shape(logits)
    self.antecedent_scores_shape = tf.shape(antecedent_scores)

    # antecedent_scores = tf.gather(attention[0], tf.range(k))

    predictions = tf.argmax(logits, axis=2) # [1, num_words] ?

    # loss = tf.losses.softmax_cross_entropy(span_labels, tf.squeeze(logits))
    # loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=tf.squeeze(logits), labels=span_labels))
    # span_seq_expanded = tf.expand_dims(span_seq_sorted, 0)
    # loss = tf.losses.log_loss(span_labels, tf.squeeze(tf.nn.softmax(logits, dim=2))) + .1 * mention_loss
    # self.tag_seq = tag_seq

    # tagging_loss = tf.reduce_sum(self.sigmoid_margin(logits, tf.to_float(span_labels), tf.to_float(k)))

    tagging_loss = tf.reduce_sum(self.exp_loss_margin(logits, tf.to_float(span_labels), 2))

    antecedent_loss = tf.reduce_sum(self.softmax_loss(antecedent_scores[0], antecedent_labels))

    # antecedent_loss = tf.reduce_sum(self.exp_loss_margin(antecedent_scores[0], antecedent_labels, 1))

    # y = tf.squeeze(tf.nn.softmax(logits, dim=2))

    # loss = -tf.reduce_sum(span_labels * tf.log(y) + (1 - span_labels) * tf.log(1 - y))
    # loss = -tf.reduce_sum((1 - span_labels) * tf.log(1 - y))

    self.tagging_loss = tagging_loss
    # self.mention_loss = mention_loss
    self.antecedent_loss = antecedent_loss

    loss = tagging_loss + antecedent_loss # + mention_loss
    
    self.p = predictions

    return [
            candidate_starts,
            candidate_ends,
            candidate_mention_scores,
            mention_starts,
            mention_ends,
            predictions,
            antecedents,
            antecedent_scores[0]
          ], loss
Exemple #3
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores], loss
Exemple #4
0
    def get_predictions_and_loss(self, word_emb, char_index, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids):

        self.gold_starts = gold_starts
        self.gold_ends = gold_ends
        self.cluster_ids = cluster_ids

        self.dropout = 1 - (tf.to_float(is_training) *
                            self.config["dropout_rate"])
        self.lexical_dropout = 1 - (tf.to_float(is_training) *
                                    self.config["lexical_dropout_rate"])

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = tf.reshape(text_len_mask,
                                   [num_sentences * max_sentence_length])
        # self.text_len_mask = text_len_mask[0]

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = tf.nn.dropout(text_outputs, self.dropout)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_text_emb = self.flatten_emb_by_sentence(
            text_emb, text_len_mask)  # [num_words]
        self.flattened_sentence_indices = flattened_sentence_indices

        word_nemb = tf.concat([text_outputs, flattened_text_emb], 1)
        # word_nemb = tf.reshape(word_nemb, [tf.shape(text_outputs)[0], tf.shape(text_outputs)[1] + tf.shape(flattened_text_emb)[1]])
        with tf.variable_scope("conv_score"):
            # nemb_size = util.shape(word_nemb, 1)
            # w = tf.get_variable("w", [3, nemb_size, 150])
            # b = tf.get_variable("b", [150])
            # conv = tf.nn.conv1d(tf.expand_dims(word_nemb, 0), w, stride=1, padding="SAME")
            # h = tf.nn.relu(tf.nn.bias_add(conv, b))
            candidate_word_scores = util.projection(word_nemb, 2)
            start_scores, end_scores = tf.split(candidate_word_scores, [1, 1],
                                                1)
            start_scores = tf.reshape(start_scores, [-1])
            end_scores = tf.reshape(end_scores, [-1])

        candidate_starts, candidate_ends = coref_ops.spans(
            sentence_indices=flattened_sentence_indices,
            max_width=self.max_mention_width)
        candidate_starts.set_shape([None])
        candidate_ends.set_shape([None])

        candidate_mention_emb = self.get_mention_emb(
            flattened_text_emb, text_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_mention_emb)  # [num_mentions, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [num_mentions]
        candidate_mention_scores += tf.gather(start_scores,
                                              candidate_starts) + tf.gather(
                                                  end_scores, candidate_ends)

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(text_outputs)[0]) *
                self.config["mention_ratio"]))
        k = tf.minimum(k, self.config["max_antecedents"])
        predicted_mention_indices = coref_ops.extract_mentions(
            candidate_mention_scores, candidate_starts, candidate_ends,
            k)  # ([k], [k])
        predicted_mention_indices.set_shape([None])

        mention_starts = tf.gather(candidate_starts,
                                   predicted_mention_indices)  # [num_mentions]
        mention_ends = tf.gather(candidate_ends,
                                 predicted_mention_indices)  # [num_mentions]
        mention_emb = tf.gather(
            candidate_mention_emb,
            predicted_mention_indices)  # [num_mentions, emb]
        # mention_emb = tf.reshape(mention_emb, [k, -1])
        mention_scores = tf.gather(candidate_mention_scores,
                                   predicted_mention_indices)  # [num_mentions]
        word_scores = tf.gather(candidate_word_scores,
                                predicted_mention_indices)

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_speaker_ids = tf.gather(speaker_ids,
                                        mention_starts)  # [num_mentions]

        max_antecedents = self.config["max_antecedents"]
        antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(
            mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids,
            k
        )  # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
        antecedents.set_shape([None, None])
        antecedent_labels.set_shape([None, None])
        antecedents_len.set_shape([None])

        antecedent_scores = self.get_antecedent_scores(
            mention_emb, word_nemb, mention_scores, antecedents,
            antecedents_len, mention_starts, mention_ends, mention_speaker_ids,
            speaker_ids, genre_emb, tf.expand_dims(start_scores, 1),
            tf.expand_dims(end_scores, 1), k)  # [num_mentions, max_ant + 1]

        loss = self.softmax_loss(antecedent_scores,
                                 antecedent_labels)  # [num_mentions]
        # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            mention_starts, mention_ends, antecedents, antecedent_scores
        ], loss
Exemple #5
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tags, ner_tags, categories, ner_ids, cat_glove, domain_labels, l):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    if self.config["use_pos_tag"]:
      text_emb_list.append(pos_tags)

    if self.config["use_ner_g"]:
      text_emb_list.append(ner_tags)

    if self.config["use_categories"]:
      text_emb_list.append(categories)

    if self.config["use_categories_glove"]:
      text_emb_list.append(cat_glove)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]
    # print "------------------------------"
    # print "GENRE EMB"
    # print genre_emb.shape, genre

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    # get_mention_scores call util.ffnn
    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    mention_ner_ids = tf.gather(ner_ids, mention_starts)

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    # get_antecedent_scores calls util.ffnn
    antecedent_scores, pair_emb = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids) # [num_mentions, max_ant + 1]

    # antecedent scores are floating points
    # antecedent labels are True/False
    # softmax_loss converts True/False into floating points
    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    print "---------------------------------------"
    print "PAIR EMB"
    print pair_emb.shape
    print "---------------------------------------"
    print "CANDIDATE"
    print candidate_mention_emb.shape
    print "---------------------------------------"


    # if self.config["use_dann"]:
    d_logits = util.dann(candidate_mention_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="1")
    # elif self.config["use_dann_pairwise"]:
    d_logits2 = util.dann(pair_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="2")
    print "---------------------------------------"
    print "D_LOGITS"
    print d_logits2.shape
    print "---------------------------------------"
    d_probs2 = tf.nn.softmax(d_logits2)

    d_probs = tf.nn.softmax(d_logits) # P(genre | mentions)
    neg_ll = -tf.log(tf.clip_by_value(d_probs,1e-10,1.0)) # N x 7
    N = tf.shape(neg_ll)[0]

    # argmax of d_probs are predicted domains
    pred_domains = tf.argmax(d_probs, 1)
    
    # changing 1 x 7 domain labels to N x 7
    tiled_domain_labels = tf.tile(tf.expand_dims(domain_labels, 0), [N, 1])

    # converting tiles to be comparable to pred_domains
    gold_domains = tf.argmax(tiled_domain_labels, 1)

    # multiply neg_ll with tiled_domain_labels
    pairwise_loss = tf.multiply(neg_ll, tiled_domain_labels) # N x 7
    pairwise_loss_reduced = tf.reduce_sum(pairwise_loss, 0)

    domain_loss = tf.reduce_sum(tf.divide(pairwise_loss_reduced, tf.cast(N, tf.float32)))
    domain_loss_reduce_mean = tf.reduce_sum(tf.reduce_mean(pairwise_loss, 0))

    correct_domain_predictions = tf.equal(pred_domains, gold_domains)
    domain_accuracy = tf.reduce_mean(tf.cast(correct_domain_predictions, tf.float32))

    values = [domain_accuracy, domain_loss_reduce_mean, pairwise_loss_reduced, N, neg_ll, d_logits2, d_logits]

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, antecedent_labels, genre], loss, domain_loss, pred_domains, values
Exemple #6
0
    def get_predictions_and_loss(self, word_emb, char_index, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids):

        self.gold_starts = gold_starts
        self.gold_ends = gold_ends
        self.cluster_ids = cluster_ids

        self.dropout = 1 - (tf.to_float(is_training) *
                            self.config["dropout_rate"])
        self.lexical_dropout = 1 - (tf.to_float(is_training) *
                                    self.config["lexical_dropout_rate"])

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = tf.reshape(text_len_mask,
                                   [num_sentences * max_sentence_length])
        # self.text_len_mask = text_len_mask[0]

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = tf.nn.dropout(text_outputs, self.dropout)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_text_emb = self.flatten_emb_by_sentence(
            text_emb, text_len_mask)  # [num_words]
        self.flattened_sentence_indices = flattened_sentence_indices

        candidate_starts, candidate_ends = coref_ops.spans(
            sentence_indices=flattened_sentence_indices,
            max_width=self.max_mention_width)
        candidate_starts.set_shape([None])
        candidate_ends.set_shape([None])

        candidate_mention_emb = self.get_mention_emb(
            flattened_text_emb, text_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_mention_emb)  # [num_mentions, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [num_mentions]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(text_outputs)[0]) *
                self.config["mention_ratio"]))
        k = tf.minimum(k, self.config["max_antecedents"])
        predicted_mention_indices = coref_ops.extract_mentions(
            candidate_mention_scores, candidate_starts, candidate_ends,
            k)  # ([k], [k])
        predicted_mention_indices.set_shape([None])

        mention_starts = tf.gather(candidate_starts,
                                   predicted_mention_indices)  # [num_mentions]
        mention_ends = tf.gather(candidate_ends,
                                 predicted_mention_indices)  # [num_mentions]
        mention_emb = tf.gather(
            candidate_mention_emb,
            predicted_mention_indices)  # [num_mentions, emb]
        mention_scores = tf.gather(candidate_mention_scores,
                                   predicted_mention_indices)  # [num_mentions]

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_speaker_ids = tf.gather(speaker_ids,
                                        mention_starts)  # [num_mentions]

        max_antecedents = self.config["max_antecedents"]
        antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(
            mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids,
            max_antecedents
        )  # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
        antecedents.set_shape([None, None])
        antecedent_labels.set_shape([None, None])
        antecedents_len.set_shape([None])

        A = tf.eye(k)

        with tf.variable_scope("layer_1"):
            antecedent_scores_1 = self.get_antecedent_scores(
                mention_emb, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]
            DAD_1 = self.get_DAD(A, antecedent_scores_1, k)
            mention_emb_1 = tf.nn.relu(
                util.projection(tf.matmul(DAD_1, mention_emb), 200))

        with tf.variable_scope("layer_2"):
            antecedent_scores_2 = self.get_antecedent_scores(
                mention_emb_1, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]
            DAD_2 = self.get_DAD(A, antecedent_scores_2, k)
            mention_emb_2 = tf.nn.relu(
                util.projection(tf.matmul(DAD_2, mention_emb), 200))

        with tf.variable_scope("layer_3"):
            antecedent_scores = self.get_antecedent_scores(
                mention_emb_2, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]

        antecedent_scores += antecedent_scores_1
        loss = self.softmax_loss(antecedent_scores,
                                 antecedent_labels)  # [num_mentions]
        # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            mention_starts, mention_ends, antecedents, antecedent_scores
        ], loss