Example #1
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
    self.tm_shape = tf.shape(text_len_mask)

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)
    self.txt_shape = tf.shape(text_outputs)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    self.flattened_sentence_indices = flattened_sentence_indices
    self.emb_shape = tf.shape(flattened_text_emb)

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    # predicted_mention_indices, context_starts, context_ends, context_length = coref_ops.extract_mentions(candidate_mention_scores,
    #                           candidate_starts,
    #                           candidate_ends,
    #                           k,
    #                           self.max_context_width) # ([k], [k])
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores,
                              candidate_starts,
                              candidate_ends,
                              k) # ([k], [k])
    predicted_mention_indices.set_shape([None])
    # context_starts.set_shape([None])
    # context_ends.set_shape([None])
    # context_length.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    context_starts = tf.maximum(mention_starts - 5, 0)
    context_ends = tf.minimum(mention_ends + 5, util.shape(text_outputs, 0) - 1)

    context_start_emb = tf.gather(text_outputs, context_starts) # [num_mentions, emb]
    context_end_emb = tf.gather(text_outputs, context_ends) # [num_mentions, emb]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts,
                                                                              mention_ends,
                                                                              gold_starts,
                                                                              gold_ends,
                                                                              cluster_ids,
                                                                              max_antecedents)# ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_context_antecedent_scores(mention_emb,
                                                    mention_scores,
                                                    antecedents,
                                                    antecedents_len,
                                                    mention_starts,
                                                    mention_ends,
                                                    mention_speaker_ids,
                                                    genre_emb,
                                                    context_starts,
                                                    context_ends,
                                                    text_outputs,
                                                    flattened_text_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [
            candidate_starts,
            candidate_ends,
            candidate_mention_scores,
            mention_starts,
            mention_ends,
            antecedents,
            antecedent_scores
          ], loss
Example #2
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores], loss
Example #3
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tags, ner_tags, categories, ner_ids, cat_glove, domain_labels, l):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    if self.config["use_pos_tag"]:
      text_emb_list.append(pos_tags)

    if self.config["use_ner_g"]:
      text_emb_list.append(ner_tags)

    if self.config["use_categories"]:
      text_emb_list.append(categories)

    if self.config["use_categories_glove"]:
      text_emb_list.append(cat_glove)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]
    # print "------------------------------"
    # print "GENRE EMB"
    # print genre_emb.shape, genre

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    # get_mention_scores call util.ffnn
    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    mention_ner_ids = tf.gather(ner_ids, mention_starts)

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    # get_antecedent_scores calls util.ffnn
    antecedent_scores, pair_emb = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids) # [num_mentions, max_ant + 1]

    # antecedent scores are floating points
    # antecedent labels are True/False
    # softmax_loss converts True/False into floating points
    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    print "---------------------------------------"
    print "PAIR EMB"
    print pair_emb.shape
    print "---------------------------------------"
    print "CANDIDATE"
    print candidate_mention_emb.shape
    print "---------------------------------------"


    # if self.config["use_dann"]:
    d_logits = util.dann(candidate_mention_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="1")
    # elif self.config["use_dann_pairwise"]:
    d_logits2 = util.dann(pair_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="2")
    print "---------------------------------------"
    print "D_LOGITS"
    print d_logits2.shape
    print "---------------------------------------"
    d_probs2 = tf.nn.softmax(d_logits2)

    d_probs = tf.nn.softmax(d_logits) # P(genre | mentions)
    neg_ll = -tf.log(tf.clip_by_value(d_probs,1e-10,1.0)) # N x 7
    N = tf.shape(neg_ll)[0]

    # argmax of d_probs are predicted domains
    pred_domains = tf.argmax(d_probs, 1)
    
    # changing 1 x 7 domain labels to N x 7
    tiled_domain_labels = tf.tile(tf.expand_dims(domain_labels, 0), [N, 1])

    # converting tiles to be comparable to pred_domains
    gold_domains = tf.argmax(tiled_domain_labels, 1)

    # multiply neg_ll with tiled_domain_labels
    pairwise_loss = tf.multiply(neg_ll, tiled_domain_labels) # N x 7
    pairwise_loss_reduced = tf.reduce_sum(pairwise_loss, 0)

    domain_loss = tf.reduce_sum(tf.divide(pairwise_loss_reduced, tf.cast(N, tf.float32)))
    domain_loss_reduce_mean = tf.reduce_sum(tf.reduce_mean(pairwise_loss, 0))

    correct_domain_predictions = tf.equal(pred_domains, gold_domains)
    domain_accuracy = tf.reduce_mean(tf.cast(correct_domain_predictions, tf.float32))

    values = [domain_accuracy, domain_loss_reduce_mean, pairwise_loss_reduced, N, neg_ll, d_logits2, d_logits]

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, antecedent_labels, genre], loss, domain_loss, pred_domains, values
Example #4
0
    def get_predictions_and_loss(self, word_emb, char_index, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids):

        self.gold_starts = gold_starts
        self.gold_ends = gold_ends
        self.cluster_ids = cluster_ids

        self.dropout = 1 - (tf.to_float(is_training) *
                            self.config["dropout_rate"])
        self.lexical_dropout = 1 - (tf.to_float(is_training) *
                                    self.config["lexical_dropout_rate"])

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = tf.reshape(text_len_mask,
                                   [num_sentences * max_sentence_length])
        # self.text_len_mask = text_len_mask[0]

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = tf.nn.dropout(text_outputs, self.dropout)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_text_emb = self.flatten_emb_by_sentence(
            text_emb, text_len_mask)  # [num_words]
        self.flattened_sentence_indices = flattened_sentence_indices

        word_nemb = tf.concat([text_outputs, flattened_text_emb], 1)
        # word_nemb = tf.reshape(word_nemb, [tf.shape(text_outputs)[0], tf.shape(text_outputs)[1] + tf.shape(flattened_text_emb)[1]])
        with tf.variable_scope("conv_score"):
            # nemb_size = util.shape(word_nemb, 1)
            # w = tf.get_variable("w", [3, nemb_size, 150])
            # b = tf.get_variable("b", [150])
            # conv = tf.nn.conv1d(tf.expand_dims(word_nemb, 0), w, stride=1, padding="SAME")
            # h = tf.nn.relu(tf.nn.bias_add(conv, b))
            candidate_word_scores = util.projection(word_nemb, 2)
            start_scores, end_scores = tf.split(candidate_word_scores, [1, 1],
                                                1)
            start_scores = tf.reshape(start_scores, [-1])
            end_scores = tf.reshape(end_scores, [-1])

        candidate_starts, candidate_ends = coref_ops.spans(
            sentence_indices=flattened_sentence_indices,
            max_width=self.max_mention_width)
        candidate_starts.set_shape([None])
        candidate_ends.set_shape([None])

        candidate_mention_emb = self.get_mention_emb(
            flattened_text_emb, text_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_mention_emb)  # [num_mentions, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [num_mentions]
        candidate_mention_scores += tf.gather(start_scores,
                                              candidate_starts) + tf.gather(
                                                  end_scores, candidate_ends)

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(text_outputs)[0]) *
                self.config["mention_ratio"]))
        k = tf.minimum(k, self.config["max_antecedents"])
        predicted_mention_indices = coref_ops.extract_mentions(
            candidate_mention_scores, candidate_starts, candidate_ends,
            k)  # ([k], [k])
        predicted_mention_indices.set_shape([None])

        mention_starts = tf.gather(candidate_starts,
                                   predicted_mention_indices)  # [num_mentions]
        mention_ends = tf.gather(candidate_ends,
                                 predicted_mention_indices)  # [num_mentions]
        mention_emb = tf.gather(
            candidate_mention_emb,
            predicted_mention_indices)  # [num_mentions, emb]
        # mention_emb = tf.reshape(mention_emb, [k, -1])
        mention_scores = tf.gather(candidate_mention_scores,
                                   predicted_mention_indices)  # [num_mentions]
        word_scores = tf.gather(candidate_word_scores,
                                predicted_mention_indices)

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_speaker_ids = tf.gather(speaker_ids,
                                        mention_starts)  # [num_mentions]

        max_antecedents = self.config["max_antecedents"]
        antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(
            mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids,
            k
        )  # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
        antecedents.set_shape([None, None])
        antecedent_labels.set_shape([None, None])
        antecedents_len.set_shape([None])

        antecedent_scores = self.get_antecedent_scores(
            mention_emb, word_nemb, mention_scores, antecedents,
            antecedents_len, mention_starts, mention_ends, mention_speaker_ids,
            speaker_ids, genre_emb, tf.expand_dims(start_scores, 1),
            tf.expand_dims(end_scores, 1), k)  # [num_mentions, max_ant + 1]

        loss = self.softmax_loss(antecedent_scores,
                                 antecedent_labels)  # [num_mentions]
        # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            mention_starts, mention_ends, antecedents, antecedent_scores
        ], loss
Example #5
0
    def get_predictions_and_loss(self, word_emb, char_index, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids):

        self.gold_starts = gold_starts
        self.gold_ends = gold_ends
        self.cluster_ids = cluster_ids

        self.dropout = 1 - (tf.to_float(is_training) *
                            self.config["dropout_rate"])
        self.lexical_dropout = 1 - (tf.to_float(is_training) *
                                    self.config["lexical_dropout_rate"])

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = tf.reshape(text_len_mask,
                                   [num_sentences * max_sentence_length])
        # self.text_len_mask = text_len_mask[0]

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = tf.nn.dropout(text_outputs, self.dropout)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_text_emb = self.flatten_emb_by_sentence(
            text_emb, text_len_mask)  # [num_words]
        self.flattened_sentence_indices = flattened_sentence_indices

        candidate_starts, candidate_ends = coref_ops.spans(
            sentence_indices=flattened_sentence_indices,
            max_width=self.max_mention_width)
        candidate_starts.set_shape([None])
        candidate_ends.set_shape([None])

        candidate_mention_emb = self.get_mention_emb(
            flattened_text_emb, text_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_mention_emb)  # [num_mentions, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [num_mentions]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(text_outputs)[0]) *
                self.config["mention_ratio"]))
        k = tf.minimum(k, self.config["max_antecedents"])
        predicted_mention_indices = coref_ops.extract_mentions(
            candidate_mention_scores, candidate_starts, candidate_ends,
            k)  # ([k], [k])
        predicted_mention_indices.set_shape([None])

        mention_starts = tf.gather(candidate_starts,
                                   predicted_mention_indices)  # [num_mentions]
        mention_ends = tf.gather(candidate_ends,
                                 predicted_mention_indices)  # [num_mentions]
        mention_emb = tf.gather(
            candidate_mention_emb,
            predicted_mention_indices)  # [num_mentions, emb]
        mention_scores = tf.gather(candidate_mention_scores,
                                   predicted_mention_indices)  # [num_mentions]

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_speaker_ids = tf.gather(speaker_ids,
                                        mention_starts)  # [num_mentions]

        max_antecedents = self.config["max_antecedents"]
        antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(
            mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids,
            max_antecedents
        )  # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
        antecedents.set_shape([None, None])
        antecedent_labels.set_shape([None, None])
        antecedents_len.set_shape([None])

        A = tf.eye(k)

        with tf.variable_scope("layer_1"):
            antecedent_scores_1 = self.get_antecedent_scores(
                mention_emb, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]
            DAD_1 = self.get_DAD(A, antecedent_scores_1, k)
            mention_emb_1 = tf.nn.relu(
                util.projection(tf.matmul(DAD_1, mention_emb), 200))

        with tf.variable_scope("layer_2"):
            antecedent_scores_2 = self.get_antecedent_scores(
                mention_emb_1, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]
            DAD_2 = self.get_DAD(A, antecedent_scores_2, k)
            mention_emb_2 = tf.nn.relu(
                util.projection(tf.matmul(DAD_2, mention_emb), 200))

        with tf.variable_scope("layer_3"):
            antecedent_scores = self.get_antecedent_scores(
                mention_emb_2, mention_scores, antecedents, antecedents_len,
                mention_starts, mention_ends, mention_speaker_ids, genre_emb,
                k)  # [num_mentions, max_ant + 1]

        antecedent_scores += antecedent_scores_1
        loss = self.softmax_loss(antecedent_scores,
                                 antecedent_labels)  # [num_mentions]
        # loss = self.exp_loss_margin(antecedent_scores, antecedent_labels) # [num_mentions]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            mention_starts, mention_ends, antecedents, antecedent_scores
        ], loss