Esempio n. 1
0
  def get_predictions_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map):
    model = modeling.BertModel(
      config=self.bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      use_one_hot_embeddings=False,
      scope='bert')
    all_encoder_layers = model.get_all_encoder_layers()
    mention_doc = model.get_sequence_output() # [batch_size, seq_length, hidden_size]

    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)

    num_sentences = tf.shape(mention_doc)[0]
    max_sentence_length = tf.shape(mention_doc)[1]
    mention_doc = self.flatten_emb_by_sentence(mention_doc, input_mask) # [num_words, hidden_size]
    num_words = util.shape(mention_doc, 0)
    antecedent_doc = mention_doc


    flattened_sentence_indices = sentence_map
    candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width]
    candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width]
    candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width]
    candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width]
    candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width]
    flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
    candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates]

    candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates]

    candidate_span_emb = self.get_span_emb(mention_doc, mention_doc, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_span_emb, candidate_starts, candidate_ends)
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k]

    # beam size
    k = tf.minimum(3900, tf.to_int32(tf.floor(tf.to_float(num_words) * self.config["top_span_ratio"])))
    c = tf.minimum(self.config["max_top_antecedents"], k)
    # pull from beam
    top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0),
                                               tf.expand_dims(candidate_starts, 0),
                                               tf.expand_dims(candidate_ends, 0),
                                               tf.expand_dims(k, 0),
                                               num_words,
                                               True) # [1, k]
    top_span_indices.set_shape([1, None])
    top_span_indices = tf.squeeze(top_span_indices, 0) # [k]

    top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
    top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
    top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
    top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k]
    top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)),
                          genre) # [emb]
    if self.config['use_metadata']:
      speaker_ids = self.flatten_emb_by_sentence(speaker_ids, input_mask)
      top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]i
    else:
        top_span_speaker_ids = None


    dummy_scores = tf.zeros([k, 1]) # [k, 1]
    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c)
    num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1)
    word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1), [1, seg_len])
    flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]), tf.reshape(input_mask, [-1]))
    mention_segments = tf.expand_dims(tf.gather(flat_word_segments, top_span_starts), 1) # [k, 1]
    antecedent_segments = tf.gather(flat_word_segments, tf.gather(top_span_starts, top_antecedents)) #[k, c]
    segment_distance = tf.clip_by_value(mention_segments - antecedent_segments, 0, self.config['max_training_sentences'] - 1) if self.config['use_segment_distance'] else None #[k, c]
    if self.config['fine_grained']:
      for i in range(self.config["coref_depth"]):
        with tf.variable_scope("coref_layer", reuse=(i > 0)):
          top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
          top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance) # [k, c]
          top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1]
          top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb]
          attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb]
          with tf.variable_scope("f"):
            f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb]
            top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb]
    else:
        top_antecedent_scores = top_fast_antecedent_scores

    top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1]

    top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c]
    top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c]
    same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c]
    non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1]
    pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]
    dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1]
    top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1]
    loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
    loss = tf.reduce_sum(loss) # []

    return [candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores], loss
Esempio n. 2
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids, inject_starts, inject_ends):

        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        if self._use_injected_mentions(is_training):
            candidate_starts = tf.transpose(tf.expand_dims(inject_starts, 1))
            candidate_ends = tf.transpose(tf.expand_dims(inject_ends, 1))
        else:
            candidate_starts = tf.tile(
                tf.expand_dims(tf.range(num_words), 1),
                [1, self.max_span_width])  # [num_words, max_span_width]
            candidate_ends = candidate_starts + tf.expand_dims(
                tf.range(self.max_span_width),
                0)  # [num_words, max_span_width]

        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        if self._use_injected_mentions(is_training):
            k = tf.shape(candidate_starts)[0]
            top_span_indices = tf.expand_dims(tf.range(k), 0)
        else:
            k = tf.to_int32(
                tf.floor(
                    tf.to_float(tf.shape(context_outputs)[0]) *
                    self.config["top_span_ratio"]))
            top_span_indices = coref_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(candidate_starts, 0),
                tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                util.shape(context_outputs, 0), True)  # [1, k]

        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Esempio n. 3
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, \
        text_len, is_training, entity_starts, entity_ends, entity_labels):
        
        self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
        self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

        num_sentences = tf.shape(input=context_word_emb)[0]
        max_sentence_length = tf.shape(input=context_word_emb)[1]

        # embeddings
        # glove embedding + char embedding + elmo embedding
        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        # character embedding
        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(tf.compat.v1.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), \
                char_index) # [num_sentences, max_sentence_length, max_word_length, char_embedding_size]
            flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util_tf2.shape(char_emb, 2), \
                util_tf2.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, char_embedding_size]
            flattened_aggregated_char_emb = self.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) 
            # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, \
                util_tf2.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
            
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)
        
        # ELMo embedding
        # lm_emb: [num_sentence, max_sentence_length, lm_size, lm_layers]
        lm_emb_size = util_tf2.shape(lm_emb, 2)
        lm_num_layers = util_tf2.shape(lm_emb, 3)
        with tf.compat.v1.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(tf.compat.v1.get_variable("lm_scores", [lm_num_layers], initializer=tf.compat.v1.constant_initializer(0.0)))
            self.lm_scaling = tf.compat.v1.get_variable("lm_scaling", [], initializer=tf.compat.v1.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        # [num_sentences * max_sentence_length * lm_emb_size, lm_emb_layers]
        flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) 
        # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        # concatenate embeddings
        context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb]
        # dropout
        context_emb = tf.nn.dropout(context_emb, 1 - (self.lexical_dropout)) # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(head_emb, 1 - (self.lexical_dropout)) # [num_sentences, max_sentence_length, emb]

        # embedding part done

        # sequence_mask:
        # orignal tensor t[d_1, d_2,..., d_n]
        # mask[i_1, i_2, ..., i_n, j] = t[i_1, i_2, ..., i_n] < j
        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

        # bi-directional lstm
        # every word gets an embedding
        context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb]
        num_words = util_tf2.shape(context_outputs, 0)

        # handle spans
        sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words]

        candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) 
        # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) 
        # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) 
        # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) 
        # [num_words, max_span_width]
        
        # candidate spans must come from the same sentence
        candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) 
        # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(tensor=tf.reshape(candidate_starts, [-1]), mask=flattened_candidate_mask) # [num_candidates]
        candidate_ends = tf.boolean_mask(tensor=tf.reshape(candidate_ends, [-1]), mask=flattened_candidate_mask) # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(tensor=tf.reshape(candidate_start_sentence_indices, [-1]), mask=flattened_candidate_mask) 
        # [num_candidates]

        # get labels
        candidate_entity_labels = self.get_entity_labels(candidate_starts, candidate_ends, entity_starts, entity_ends, entity_labels)
        # [num_candidates]

        candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
        candidate_mention_scores =  self.get_mention_scores(candidate_span_emb) # [num_candidates, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_candidates]

        # filter out part of spans
        k = tf.cast(tf.floor(tf.cast(tf.shape(input=context_outputs)[0], dtype=tf.float32) * self.config["top_span_ratio"]), dtype=tf.int32)
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0),
            tf.expand_dims(k, 0),
            util_tf2.shape(context_outputs, 0),
            True) # [1, k]
        
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0) # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
        top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
        top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k]
        top_span_entity_labels = tf.gather(candidate_entity_labels, top_span_indices) # [k]

        # entity scores
        self.entity_scores = self.get_entity_scores(top_span_emb)
        self.entity_labels_mask = self.get_entity_label_mask(top_span_entity_labels)
        # entity loss function
        entity_loss = self.get_entity_loss(self.entity_scores, self.entity_labels_mask) # []

        return [self.entity_scores, self.entity_labels_mask], entity_loss
Esempio n. 4
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, text_len,\
         is_training, gold_starts, gold_ends, cluster_ids,swag_context_emb, swag_text_len, swag_label):
        """
        This is the major part of the architecutre, and is the placehlder. 
        We have two branches - one for SWAG, and another for the main Lee code.
        """
        self.same(is_training)
        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]
        print("normal", swag_context_emb)
        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        # genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]
        genre_emb = None
        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        # top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Esempio n. 5
0
    def get_predictions_and_loss(self, input_ids, input_mask, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids, sentence_map):
        model = modeling.BertModel(config=self.bert_config,
                                   is_training=is_training,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   use_one_hot_embeddings=False,
                                   scope='bert')
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        mention_doc = model.get_sequence_output(
        )  # (batch_size, seq_len, hidden)
        mention_doc = self.flatten_emb_by_sentence(
            mention_doc, input_mask)  # (b, s, e) -> (b*s, e) 取出有效token的emb
        num_words = util.shape(mention_doc, 0)  # b*s

        # candidate_span: 每个位置都可能是起点,对每个起点有max_span_width种不同的终点,总共有(num_words, max_span_width)种可能
        candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1),
                                   [1, self.max_span_width])
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)

        # [num_words, max_span_width],根据index将对应位置的sentence_id取出来
        candidate_start_sentence_indices = tf.gather(sentence_map,
                                                     candidate_starts)
        candidate_end_sentence_indices = tf.gather(
            sentence_map, tf.minimum(candidate_ends, num_words - 1))
        # [num_words, max_span_width],合法的span需要满足start/end不能越界;start/end必须在同一个句子里
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(candidate_start_sentence_indices,
                     candidate_end_sentence_indices))
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        # [num_candidates] 把候选span mask掉再铺平
        candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]),
                                           flattened_candidate_mask)
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates] 每个候选span的cluster_id
        # [num_candidates, emb] 候选答案的向量表示  [num_candidates,] 候选答案的得分
        candidate_span_emb = self.get_span_embmax_top_antecedents(
            mention_doc, candidate_starts, candidate_ends)
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb, candidate_starts, candidate_ends)

        # beam size 所有span的数量小于num_words * top_span_ratio
        k = tf.minimum(
            3900,
            tf.to_int32(
                tf.floor(
                    tf.to_float(num_words) * self.config["top_span_ratio"])))
        c = tf.minimum(self.config["max_top_antecedents"],
                       k)  # 初筛挑出0.4*500=200个候选,细筛再挑出50个候选
        # pull from beam,光使用mention_score卡前0.4*num_words个span
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), num_words,
            True)  # [1, k]
        top_span_indices = tf.reshape(
            top_span_indices, [-1])  # k个按mention_score初筛出来的candidate的index

        # 取出top_k的span的信息,过coarse的span pair筛选,每个span取前c个antecedent
        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]

        # def body(idx, tensors):
        #     fake_input = tf.stack([top_span_starts, top_span_ends])
        #     fake_model = modeling.BertModel(
        #         config=self.bert_config,
        #         is_training=is_training,
        #         input_ids=fake_input,
        #         use_one_hot_embeddings=False,
        #         scope='bert')
        #     fake_output = fake_model.get_sequence_output()
        #     return idx + 1, tf.Print(tensors, [tf.shape(fake_output)], 'fake_output')
        #
        # # do the loop:
        # initial_outs = model.get_sequence_output()
        # _, final_outs = tf.while_loop(lambda z, t: z < 100, body, loop_vars=(0, initial_outs))
        # top_span_emb = tf.Print(top_span_emb, [tf.shape(tf.stack(final_outs))], "final_outs")
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_pruning(
            top_span_emb, top_span_mention_scores, c)

        genre_emb = tf.gather(
            tf.get_variable(
                "genre_embeddings",
                [len(self.genres), self.config["feature_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02)),
            genre)  # [emb]
        if self.config['use_metadata']:
            speaker_ids = self.flatten_emb_by_sentence(speaker_ids,
                                                       input_mask)  # 拍平后加mask
            top_span_speaker_ids = tf.gather(
                speaker_ids, top_span_starts)  # 每个span取start位置的speaker_id
        else:
            top_span_speaker_ids = None

        dummy_scores = tf.zeros([k, 1])  # [k, 1]

        num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1)
        word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1),
                                [1, seg_len])
        flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]),
                                             tf.reshape(input_mask, [-1]))
        # mention_segments:[num_candidates, ] 找出每个candidate_span在第几个segment里
        mention_segments = tf.expand_dims(
            tf.gather(flat_word_segments, top_span_starts), 1)  # [k, 1]
        # antecedent_segments: [k, c] 找出每个candidate_span的每个antecedents对应在第几个segment里
        antecedent_segments = tf.gather(flat_word_segments,
                                        tf.gather(top_span_starts,
                                                  top_antecedents))  # [k, c]
        segment_distance = None
        if self.config[
                'use_segment_distance']:  # [k, c] 每个mention和其antecedent之间隔了几个segment
            segment_distance = tf.clip_by_value(
                mention_segments - antecedent_segments, 0,
                self.config['max_training_sentences'] - 1)
        if self.config['fine_grained']:  # 所谓融入high-order information
            for i in range(self.config["coref_depth"]):
                with tf.variable_scope("coref_layer", reuse=(i > 0)):
                    top_antecedent_emb = tf.gather(
                        top_span_emb, top_antecedents)  # [k, c, emb]
                    top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                        top_span_emb, top_antecedents, top_antecedent_emb,
                        top_antecedent_offsets, top_span_speaker_ids,
                        genre_emb, segment_distance
                    )  # [k, c] 算出最后的得分s(i, j) =sm(i) + sm(j) + sc(i, j) + sa(i, j)
                    # top_antecedent_weights: [k, c + 1] 每个mention对所有antecedent分配权重
                    # top_antecedent_emb:[k, c + 1, emb] 每个mention每个antecedent的embedding
                    # attended_span_emb:[k, emb] 每个mention所有antecedent的表示做加权和
                    top_antecedent_weights = tf.nn.softmax(
                        tf.concat([dummy_scores, top_antecedent_scores], 1))
                    top_antecedent_emb = tf.concat(
                        [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                        1)
                    attended_span_emb = tf.reduce_sum(
                        tf.expand_dims(top_antecedent_weights, 2) *
                        top_antecedent_emb, 1)
                    with tf.variable_scope("f"):
                        f = tf.sigmoid(
                            util.projection(
                                tf.concat([top_span_emb, attended_span_emb],
                                          1), util.shape(top_span_emb,
                                                         -1)))  # [k, emb]
                        top_span_emb = f * attended_span_emb + (
                            1 - f) * top_span_emb  # [k, emb]
        else:
            top_antecedent_scores = top_fast_antecedent_scores

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        # top_antecedent_cluster_ids [k, c] 每个mention每个antecedent的cluster_id
        # same_cluster_indicator [k, c] 每个mention跟每个预测的antecedent是否同一个cluster
        # pairwise_labels [k, c] 用pairwise的方法得到的label,非mention、非antecedent都是0,mention跟antecedent共指是1
        # top_antecedent_labels [k, c+1] 最终的标签,如果某个mention没有antecedent就是dummy_label为1
        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        # top_antecedent_labels = tf.Print(top_antecedent_labels, [tf.shape(top_antecedent_labels)], "ant labels")
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        # self.a , self.b = text_len , max_sentence_length
        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        k = tf.minimum(500, k)
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        # c = tf.minimum(self.config["max_top_antecedents"], k)
        # self.top = top_span_emb

        orig_dim = 1270
        with tf.name_scope("transformer"):
            with tf.name_scope("embedding_transformer"):
                W = tf.Variable(tf.random_normal((orig_dim, self.new_dim)))
                b = tf.Variable(tf.random_normal((self.new_dim, )))
                temp_input = tf.nn.relu(tf.matmul(top_span_emb, W) + b)

            padding_mask_partial = tf.cast(tf.sequence_mask(
                tf.shape(temp_input)[0], maxlen=self.seq_length),
                                           dtype=tf.float32)
            multiples = [self.seq_length]
            padding_mask_partial2 = tf.tile(padding_mask_partial, multiples)
            enc_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            # enc_padding_mask  = tf.matrix_set_diag(enc_padding_mask, tf.zeros(enc_padding_mask.shape[0:-1]), name=None)

            dec_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            dec_padding_mask = tf.matrix_set_diag(
                dec_padding_mask,
                tf.zeros(dec_padding_mask.shape[0:-1]),
                name=None)

            look_ahead_mask = create_look_ahead_mask(
                tf.shape(padding_mask_partial)[0])
            combined_mask = tf.minimum(enc_padding_mask, look_ahead_mask)

            s = tf.shape(temp_input)
            paddings = [[0, self.seq_length - s[0]], [0, 0]]
            padded_embd = tf.pad(temp_input, paddings, "CONSTANT")

            predictions, _ = self.sample_transformer(padded_embd, padded_embd,
                                                     True, enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)

            # self.chikka = predictions
            # self.chikka2 = predictions[:k]
            top_span_emb = tf.concat([predictions[:k], top_span_emb], 1)

        # hidd = self.new_dim // 3
        # with tf.name_scope("Scorer"):
        #    h1_1 = tf.layers.dense(predictions, hidd)
        #    h1_2 = tf.layers.dense(predictions, hidd)
        #    h1 = tf.concat([h1_1 , h1_2] , 1 )
        #       W2 = tf.Variable(tf.random_normal((hidd*2, 1)))
        #       b2 = tf.Variable(tf.random_normal((1,)))
        #       score = tf.nn.relu(tf.matmul(h1, W) + b)

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]

        # with tf.variable_scope("coref_layer"):
        #     top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        #     top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c]

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Esempio n. 7
0
        def compute_from_emb(candidate_span_emb):
            with tf.variable_scope("prediction_scope", reuse=tf.AUTO_REUSE):

                candidate_mention_scores = self.get_mention_scores(
                    candidate_span_emb)  # [k, 1]
                candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                                      1)  # [k]

                k = tf.to_int32(
                    tf.floor(
                        tf.to_float(tf.shape(context_outputs)[0]) *
                        self.config["top_span_ratio"]))
                top_span_indices = coref_ops.extract_spans(
                    tf.expand_dims(candidate_mention_scores, 0),
                    tf.expand_dims(candidate_starts, 0),
                    tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                    util.shape(context_outputs, 0), True)  # [1, k]
                top_span_indices.set_shape([1, None])
                top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

                top_span_starts = tf.gather(candidate_starts,
                                            top_span_indices)  # [k]
                top_span_ends = tf.gather(candidate_ends,
                                          top_span_indices)  # [k]
                top_span_emb = tf.gather(candidate_span_emb,
                                         top_span_indices)  # [k, emb]
                top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                                 top_span_indices)  # [k]
                top_span_mention_scores = tf.gather(candidate_mention_scores,
                                                    top_span_indices)  # [k]
                top_span_sentence_indices = tf.gather(
                    candidate_sentence_indices, top_span_indices)  # [k]
                top_span_speaker_ids = tf.gather(speaker_ids,
                                                 top_span_starts)  # [k]
                self.head_scores = tf.gather(
                    candidate_head_scores,
                    top_span_indices)  # [k, max_span_width]

                c = tf.minimum(self.config["max_top_antecedents"], k)

                if self.config["coarse_to_fine"]:
                    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                        top_span_emb, top_span_mention_scores, c)
                else:
                    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                        top_span_emb, top_span_mention_scores, c)

                dummy_scores = tf.zeros([k, 1])  # [k, 1]
                for i in range(self.config["coref_depth"]):
                    with tf.variable_scope("coref_layer", reuse=tf.AUTO_REUSE):
                        top_antecedent_emb = tf.gather(
                            top_span_emb, top_antecedents)  # [k, c, emb]
                        top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                            top_span_emb, top_antecedents, top_antecedent_emb,
                            top_antecedent_offsets, top_span_speaker_ids,
                            genre_emb)  # [k, c]
                        top_antecedent_weights = tf.nn.softmax(
                            tf.concat([dummy_scores, top_antecedent_scores],
                                      1))  # [k, c + 1]
                        top_antecedent_emb = tf.concat([
                            tf.expand_dims(top_span_emb, 1), top_antecedent_emb
                        ], 1)  # [k, c + 1, emb]
                        attended_span_emb = tf.reduce_sum(
                            tf.expand_dims(top_antecedent_weights, 2) *
                            top_antecedent_emb, 1)  # [k, emb]
                        with tf.variable_scope("f"):
                            f = tf.sigmoid(
                                util.projection(
                                    tf.concat(
                                        [top_span_emb, attended_span_emb], 1),
                                    util.shape(top_span_emb, -1)))  # [k, emb]
                            top_span_emb = f * attended_span_emb + (
                                1 - f) * top_span_emb  # [k, emb]

                top_antecedent_scores = tf.concat(
                    [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

                top_antecedent_cluster_ids = tf.gather(
                    top_span_cluster_ids, top_antecedents)  # [k, c]
                top_antecedent_cluster_ids += tf.to_int32(
                    tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
                same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                                  tf.expand_dims(
                                                      top_span_cluster_ids,
                                                      1))  # [k, c]
                non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                                     1)  # [k, 1]
                pairwise_labels = tf.logical_and(same_cluster_indicator,
                                                 non_dummy_indicator)  # [k, c]
                dummy_labels = tf.logical_not(
                    tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
                top_antecedent_labels = tf.concat(
                    [dummy_labels, pairwise_labels], 1)  # [k, c + 1]
                loss = self.softmax_loss(top_antecedent_scores,
                                         top_antecedent_labels)  # [k]
                loss = tf.reduce_sum(loss)  # []
                return [
                    candidate_starts, candidate_ends, candidate_mention_scores,
                    top_span_starts, top_span_ends, top_antecedents,
                    top_antecedent_scores
                ], loss