Esempio n. 1
0
    def emb2cnn(self, emb_list):
        pronoun_embedding, name_embedding, status_embedding = emb_list

        flattened_pronoun_emb = util.cnn(pronoun_embedding, self.config["filter_widths"], self.config["emb_filter_size"],
                                         name='p_')
        flattened_name_emb = util.cnn(name_embedding, self.config["filter_widths"], self.config["emb_filter_size"],
                                      name='n_')
        flattened_status_emb = util.cnn(status_embedding, self.config["filter_widths"], self.config["emb_filter_size"],
                                        name='s_')

        return flattened_pronoun_emb, flattened_name_emb, flattened_status_emb
Esempio n. 2
0
    def emb2cnn(self, emb_list):
        pronoun_embedding, name_embedding, status_embedding = emb_list

        pronoun_embedding = tf.transpose(pronoun_embedding,
                                         [1, 0, 2])  # 1, k ,emb
        name_embedding = tf.transpose(name_embedding, [1, 0, 2])
        # pronoun_embedding = tf.squeeze(pronoun_embedding, 1)
        # name_embedding = tf.squeeze(name_embedding, 1)

        flattened_pronoun_emb = util.cnn(pronoun_embedding,
                                         self.config["emb_filter_widths"],
                                         self.config["emb_filter_size"],
                                         name='p_')
        flattened_name_emb = util.cnn(name_embedding,
                                      self.config["emb_filter_widths"],
                                      self.config["emb_filter_size"],
                                      name='n_')

        return flattened_pronoun_emb, flattened_name_emb
  def get_predictions_and_loss(self, inputs):
    tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs
    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
    self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
    self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

    num_sentences = tf.shape(tokens)[0]
    max_sentence_length = tf.shape(tokens)[1]

    context_emb_list = []
    context_emb_list.append(context_word_emb)
    char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
    flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
    flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
    aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
    context_emb_list.append(aggregated_char_emb)


    if self.lm_file is not None:  # Only add these layers if we're using contextualized embeddings
      lm_emb_size = util.shape(lm_emb, 2)
      lm_num_layers = util.shape(lm_emb, 3)
      with tf.variable_scope("lm_aggregation"):
        self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
        self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))

      flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
      flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1]
      aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
      aggregated_lm_emb *= self.lm_scaling
      context_emb_list.append(aggregated_lm_emb)

    context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

    candidate_scores_mask = tf.logical_and(tf.expand_dims(text_len_mask,[1]),tf.expand_dims(text_len_mask,[2])) #[num_sentence, max_sentence_length,max_sentence_length]
    sentence_ends_leq_starts = tf.tile(tf.expand_dims(tf.logical_not(tf.sequence_mask(tf.range(max_sentence_length),max_sentence_length)), 0),[num_sentences,1,1]) #[num_sentence, max_sentence_length,max_sentence_length]
    candidate_scores_mask = tf.logical_and(candidate_scores_mask,sentence_ends_leq_starts)

    flattened_candidate_scores_mask = tf.reshape(candidate_scores_mask,[-1]) #[num_sentence * max_sentence_length * max_sentence_length]


    context_outputs = self.lstm_contextualize(context_emb, text_len,self.lstm_dropout) # [num_sentence, max_sentence_length, emb]


    with tf.variable_scope("candidate_starts_ffnn"):
      candidate_starts_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length,emb]
    with tf.variable_scope("candidate_ends_ffnn"):
      candidate_ends_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length, emb]


    candidate_ner_scores = util.bilinear_classifier(candidate_starts_emb,candidate_ends_emb,self.dropout,output_size=self.num_types+1)#[num_sentence, max_sentence_length,max_sentence_length,types+1]
    candidate_ner_scores = tf.boolean_mask(tf.reshape(candidate_ner_scores,[-1,self.num_types+1]),flattened_candidate_scores_mask)


    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=gold_labels, logits=candidate_ner_scores)
    loss = tf.reduce_sum(loss)


    return candidate_ner_scores, loss
Esempio n. 4
0
  def get_predictions_and_loss(self,
                              word_emb,
                              char_index,
                              text_len,
                              speaker_ids,
                              genre,
                              is_training,
                              gold_starts,
                              gold_ends,
                              cluster_ids,
                              tag_labels,
                              tag_seq,
                              tag_loss_label):

    # self.gold_starts = gold_starts
    # self.gold_ends = gold_ends
    # self.cluster_ids = cluster_ids

    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
    # self.text_len_mask = text_len_mask[0]

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    self.flattened_sentence_indices = flattened_sentence_indices

    # text_conv = tf.expand_dims(text_outputs, 0)
    text_conv = tf.expand_dims(flattened_text_emb, 0)
    text_conv = util.cnn_name(text_conv, [5], 100, 'tag_conv')[0]
    text_conv = tf.nn.dropout(text_conv, self.dropout)

    # text_lstm = self.encode_sentences_unilstm(text_conv)[0]

    # tag_prob = tf.nn.softmax(util.projection_name(text_conv, 3, 'tag_fc'), dim=1)
    tag_prob = util.projection_name(text_conv, 3, 'tag_fc')
    # tag_prob_transpose = tf.transpose(tag_prob, [1, 0])

    tag_outputs = tf.argmax(tag_prob, axis=1, output_type=tf.int32)

    tag_high = tf.reduce_max(tag_prob, axis=1)

    num_words = tf.shape(text_conv)[0]

    # self.lstm_shape = tf.shape(text_outputs)
    # self.conv_shape = tf.shape(text_conv)

    # candidate_starts, candidate_ends = coref_ops.spans(
    #   sentence_indices=flattened_sentence_indices,
    #   max_width=self.max_mention_width)
    # candidate_starts.set_shape([None])
    # candidate_ends.set_shape([None])

    mention_starts, mention_ends, mention_scores = coref_ops.memory(
      tag_seq=tag_outputs,
      tag_high=tag_high,
      num_words=1)
    mention_starts.set_shape([None])
    mention_ends.set_shape([None])
    mention_scores.set_shape([None])

    self.num_mention = tf.shape(mention_starts)[0]
    self.num_gold_mention = tf.shape(gold_starts)[0]
    self.num_words = num_words
    self.mention_starts = mention_starts
    self.gold_starts = gold_starts
    self.mention_ends = mention_ends
    self.tag_outputs = tag_outputs
    self.tag_seq = tag_seq

    mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, mention_starts, mention_ends) # [num_candidates, emb]
    # mention_scores = tf.convert_to_tensor([self.get_mention_prob(tag_prob_transpose, mention_starts[i], mention_ends[i], num_words)
    #                                         for i in range(tf.shape(mention_starts)[0])])

    # mention_scores = tf.squeeze(self.get_mention_scores(mention_emb), 1) # [num_mentions, 1]
    # candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    # k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    # predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    # predicted_mention_indices.set_shape([None])

    # mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    # mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    # mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    # mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    candidate_starts = mention_starts
    candidate_ends = mention_ends

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1]

    raw_mention_loss = self.softmax_loss(antecedent_scores, antecedent_labels)# [num_mentions]
    raw_tagging_loss = tf.nn.softmax_cross_entropy_with_logits(logits=tag_prob, labels=tag_labels)
    mention_loss = tf.reduce_sum(raw_mention_loss)
    tagging_loss = tf.reduce_sum(tf.multiply(tf.to_float(tag_loss_label), raw_tagging_loss)) # [] 
    # tagging_loss = tf.reduce_sum(raw_tagging_loss)

    return [
            candidate_starts,
            candidate_ends,
            mention_scores,
            mention_starts,
            mention_ends,
            antecedents,
            antecedent_scores,
            tag_outputs,
            tag_seq
          ], mention_loss, tagging_loss
Esempio n. 5
0
  def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns):
    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
    self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
    self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

    num_sentences = tf.shape(context_word_emb)[0]
    max_sentence_length = tf.shape(context_word_emb)[1]

    context_emb_list = [context_word_emb]
    head_emb_list = [head_word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      context_emb_list.append(aggregated_char_emb)
      head_emb_list.append(aggregated_char_emb)

    if not self.lm_file:
      elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
      lm_embeddings = elmo_module(
          inputs={"tokens": tokens, "sequence_len": text_len},
          signature="tokens", as_dict=True)
      word_emb = lm_embeddings["word_emb"]  # [num_sentences, max_sentence_length, 512]
      lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1),
                         lm_embeddings["lstm_outputs1"],
                         lm_embeddings["lstm_outputs2"]], -1)  # [num_sentences, max_sentence_length, 1024, 3]
    lm_emb_size = util.shape(lm_emb, 2)
    lm_num_layers = util.shape(lm_emb, 3)
    with tf.variable_scope("lm_aggregation"):
      self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
      self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))
    flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
    flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1]
    aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
    aggregated_lm_emb *= self.lm_scaling
    context_emb_list.append(aggregated_lm_emb)

    context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

    context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb]
    num_words = util.shape(context_outputs, 0)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words]

    candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width]
    candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width]
    
    #debug
    prev_can_st = candidate_starts
    prev_can_ends = candidate_ends
    #debug

    candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width]
    candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width]
    candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width]
    flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
    candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates]

    combined_candidate_st = candidate_starts*10000 + candidate_ends
    combined_gold_st = gold_starts*10000 + gold_ends

    _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions]
    whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates]
    gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions]


    candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates]

    candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates]

    candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]


    #Video Scene Emb
    ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100]
    candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100]

    '''
    #Comment : This part is for calculating mention scores and prnunign metnion
    #It is not used for this task, because mention boundary are given.

    candidate_mention_scores =  self.get_mention_scores(candidate_span_emb) # [k, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"]))
    top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0),
                                               tf.expand_dims(candidate_starts, 0),
                                               tf.expand_dims(candidate_ends, 0),
                                               tf.expand_dims(k, 0),
                                               util.shape(context_outputs, 0),
                                               True) # [1, k]
    top_span_indices.set_shape([1, None])
    top_span_indices = tf.squeeze(top_span_indices, 0) # [k]
    '''

    ######## Only Using Gold Span Indices #####
    k = tf.to_int32(util.shape(gold_span_indices,0))
    top_span_indices = gold_span_indices
    ############

    top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
    top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
    top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
    top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene]

    top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k]
    #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
    top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k]
    top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]
    top_span_genders = tf.gather(genders, top_span_ends)
    top_span_fpronouns = tf.gather(fpronouns, top_span_ends)

    # k : total number of candidates span (M in paper)
    # c : how many antecedents we check (K in paper)
    c = tf.minimum(self.config["max_top_antecedents"], k)

    if self.config["coarse_to_fine"]:
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c)
    else:
      #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c)
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c)

    dummy_scores = tf.zeros([k, 1]) # [k, 1]
    for i in range(self.config["coref_depth"]):
      with tf.variable_scope("coref_layer", reuse=(i > 0)):
        top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene]
        top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c]
        top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1]
        top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb]
        attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb]
        with tf.variable_scope("f"):
          f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb]
          top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb]

    top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1]

    top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c]
    top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c]
    same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c]
    non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1]
    pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범
    dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1]
    top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1]

    top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1]
    if (self.config["use_gender_logic_rule"]):
      top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k)
      '''
      marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1)
      gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      top_antecedent_scores = top_antecedent_prob      
      '''
      origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      origin_loss = tf.reduce_sum(origin_loss)

      # cross_entropy : -1 * ground_truth * log(prediction)
      #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores))
      teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1]))

      pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) 

      # For Validation Loss
      marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1)
      validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      
      #loss = teacher_loss + origin_loss
      loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss)

      top_antecedent_scores = top_antecedent_prob_with_logic
    else:
      loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      loss = tf.reduce_sum(loss) # []
      teacher_loss = loss
      origin_loss = loss

    return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
Esempio n. 6
0
    def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb,
                                 char_index, text_len, speaker_ids, genre,
                                 is_training, gold_starts, gold_ends,
                                 cluster_ids, bridging_ante_cids, is_status,
                                 us_mask):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if self.lm_file:
            lm_emb_size = util.shape(lm_emb, 2)
            lm_num_layers = util.shape(lm_emb, 3)
            with tf.variable_scope("lm_aggregation"):
                self.lm_weights = tf.nn.softmax(
                    tf.get_variable("lm_scores", [lm_num_layers],
                                    initializer=tf.constant_initializer(0.0)))
                self.lm_scaling = tf.get_variable(
                    "lm_scaling", [], initializer=tf.constant_initializer(1.0))
            flattened_lm_emb = tf.reshape(lm_emb, [
                num_sentences * max_sentence_length * lm_emb_size,
                lm_num_layers
            ])
            flattened_aggregated_lm_emb = tf.matmul(
                flattened_lm_emb, tf.expand_dims(
                    self.lm_weights,
                    1))  # [num_sentences * max_sentence_length * emb, 1]
            aggregated_lm_emb = tf.reshape(
                flattened_aggregated_lm_emb,
                [num_sentences, max_sentence_length, lm_emb_size])
            aggregated_lm_emb *= self.lm_scaling
            context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        top_span_starts = gold_starts
        top_span_ends = gold_ends
        top_span_cluster_ids = cluster_ids

        top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs,
                                         top_span_starts, top_span_ends)
        top_span_mention_scores = tf.zeros_like(gold_starts,
                                                dtype=tf.float32)  # [k]

        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)
        top_span_bridging_ante_cids = bridging_ante_cids
        top_us_mask = us_mask
        top_is_status = is_status

        k = util.shape(top_span_starts, 0)

        c = tf.minimum(self.config["max_top_antecedents"], k)

        top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
            top_span_emb, top_span_mention_scores, c)

        top_antecedent_emb = tf.gather(top_span_emb,
                                       top_antecedents)  # [k, c, emb]

        pair_emb = self.get_pair_embeddings(top_span_emb, top_antecedents,
                                            top_antecedent_emb,
                                            top_antecedent_offsets,
                                            top_span_speaker_ids,
                                            genre_emb)  # [k, c,emb]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]

        shared_depth = 0
        if self.config["shared_depth"] > 0:
            flattened_pair_emb = tf.reshape(
                pair_emb, [k * c, util.shape(pair_emb, 2)])
            shared_depth = min(self.config["shared_depth"],
                               self.config["ffnn_depth"])
            for i in range(shared_depth):
                hidden_weights = tf.get_variable(
                    "shared_hidden_weights_{}".format(i), [
                        util.shape(flattened_pair_emb, 1),
                        self.config["ffnn_size"]
                    ])
                hidden_bias = tf.get_variable(
                    "shared_hidden_bias_{}".format(i),
                    [self.config["ffnn_size"]])
                flattened_pair_emb = tf.nn.relu(
                    tf.nn.xw_plus_b(flattened_pair_emb, hidden_weights,
                                    hidden_bias))
                flattened_pair_emb = tf.nn.dropout(flattened_pair_emb,
                                                   self.dropout)
            pair_emb = tf.reshape(flattened_pair_emb,
                                  [k, c, self.config["ffnn_size"]])

        ante_score_list = []
        pairwise_label_list = []
        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        ante_score_list.append(dummy_scores)

        with tf.variable_scope("slow_bridging_scores"):
            slow_bridging_scores = util.ffnn(
                pair_emb, self.config["ffnn_depth"] - shared_depth,
                self.config["ffnn_size"], 1, self.dropout)  # [k, c, 1]
            slow_bridging_scores = tf.squeeze(slow_bridging_scores,
                                              2)  # [k, c]
            top_bridging_scores = slow_bridging_scores + top_fast_antecedent_scores
            ante_score_list.append(top_bridging_scores)

        bridging_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                              tf.expand_dims(
                                                  top_span_bridging_ante_cids,
                                                  1))  # [k, c]
        non_dummy_bridging_indicator = tf.expand_dims(
            top_span_bridging_ante_cids > 0, 1)  # [k, 1]

        bridging_pairwise_labels = tf.logical_and(
            bridging_cluster_indicator, non_dummy_bridging_indicator)  # [k, c]
        pairwise_label_list.append(bridging_pairwise_labels)

        if self.config["train_with_coref"]:
            with tf.variable_scope("slow_coreference_scores"):
                slow_coref_scores = util.ffnn(
                    pair_emb, self.config["ffnn_depth"] - shared_depth,
                    self.config["ffnn_size"], 1, self.dropout)  # [k, c, 1]
                slow_coref_scores = tf.squeeze(slow_coref_scores, 2)  # [k, c]
                top_coref_scores = slow_coref_scores + top_fast_antecedent_scores
                ante_score_list.append(top_coref_scores)

            coref_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                               tf.expand_dims(
                                                   top_span_cluster_ids,
                                                   1))  # [k,c]

            non_dummy_coref_indicator = tf.expand_dims(
                top_span_cluster_ids > 0, 1)  # [k,1]

            coref_pairwise_labels = tf.logical_and(
                coref_cluster_indicator, non_dummy_coref_indicator)  # [k,c]
            pairwise_label_list.append(coref_pairwise_labels)

        top_antecedent_scores = tf.concat(ante_score_list,
                                          1)  # [k, c + 1] or [k, 2*c+1]
        pairwise_labels = tf.concat(pairwise_label_list, 1)  # [k,c] or [k,2*c]

        top_antecedent_scores = tf.boolean_mask(top_antecedent_scores,
                                                top_us_mask)
        pairwise_labels = tf.boolean_mask(pairwise_labels, top_us_mask)

        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        pairwise_labels = tf.concat([dummy_labels, pairwise_labels],
                                    1)  # [k, c + 1] or [k,2*c+1]

        loss = self.softmax_loss(top_antecedent_scores, pairwise_labels)
        loss = tf.reduce_sum(loss)

        if self.config["use_gold_bridging_anaphora"]:
            bridging_mask = tf.equal(top_is_status, 2)  # bridging
            top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask)
            top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask)
            top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask)
            top_antecedent_scores_output = tf.boolean_mask(
                top_bridging_scores, bridging_mask)
        elif self.config["remove_coref_anaphora"]:
            bridging_mask = tf.not_equal(top_is_status, 1)  # DO
            top_span_starts = tf.boolean_mask(top_span_starts, bridging_mask)
            top_span_ends = tf.boolean_mask(top_span_ends, bridging_mask)
            top_antecedents = tf.boolean_mask(top_antecedents, bridging_mask)
            top_antecedent_scores_output = tf.boolean_mask(
                tf.concat([dummy_scores, top_bridging_scores], 1),
                bridging_mask)
        else:
            top_antecedent_scores_output = top_antecedent_scores

        return [
            top_span_starts, top_span_ends, top_span_cluster_ids,
            top_antecedents, top_antecedent_scores_output
        ], loss
Esempio n. 7
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores], loss
Esempio n. 8
0
    def get_predictions_and_loss(self, word_emb, char_index, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids):
        training_num = 0.0
        if is_training:
            training_num = 1.0

        #set the dropout rate
        self.dropout = 1 - (training_num * self.config["dropout_rate"])  # 0.2
        self.lexical_dropout = 1 - (
            training_num * self.config["lexical_dropout_rate"])  # 0.5

        #get the size of tensors num of sentences and max sentence length
        num_sentences = word_emb.shape[
            0]  # number of sentences to predict from
        max_sentence_length = word_emb.shape[1]
        #there is a padding to the dataset to have all sentences in the same shape

        text_emb_list = [
            word_emb
        ]  #3D tensor added in an array the 350D word embedding from glove and turian

        if self.config["char_embedding_size"] > 0:  #true is 8
            temp_tensor = tf.zeros(
                [len(self.char_dict),
                 self.config["char_embedding_size"]])  # [115,8]
            nn.init.xavier_uniform(temp_tensor)
            char_emb = tf.gather(
                temp_tensor, char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            #[a vector of embedding 8 for each character for each word for each sentence for all sentences]
            # (according to longest word and longest sentence)

            flattened_char_emb = char_emb.view([
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])
            # [num_sentences * max_sentence_length, max_word_length, emb]

            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb] character level CNN
            aggregated_char_emb = flattened_aggregated_char_emb.view([
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            text_emb_list.append(aggregated_char_emb)
        #text_emb_list has 3D tensors 350D word embeddings + 150D character embeddings 50 for each 3, 4, 5 filter size and each character is an 8dim vector
        text_emb = tf.cat(text_emb_list,
                          2)  #concatenated on the second dimension
        text_emb = F.dropout(text_emb, self.lexical_dropout)

        text_len_mask = self.sequence_mask(text_len,
                                           max_len=max_sentence_length)
        #tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = text_len_mask.view(
            [num_sentences * max_sentence_length])

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = F.dropout(text_outputs, self.dropout)

        genre_tensor = tf.zeros(
            [len(self.genres), self.config["feature_size"]])
        nn.init.xavier_uniform(genre_tensor)
        genre_emb = tf.gather(genre_tensor, genre)  # [emb]

        sentence_indices = tf.unsqueeze(tf.range(num_sentences), 1).repeat(
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_text_emb = self.flatten_emb_by_sentence(
            text_emb, text_len_mask)  # [num_words]

        candidate_starts, candidate_ends = coref_ops.coref_kernels_spans(
            sentence_indices=flattened_sentence_indices,
            max_width=self.max_mention_width)
        candidate_starts.set_shape([None])
        candidate_ends.set_shape([None])

        candidate_mention_emb = self.get_mention_emb(
            flattened_text_emb, text_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_mention_emb)  # [num_mentions, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [num_mentions]

        k = tf.floor((text_outputs.shape[0].float()) *
                     self.config["mention_ratio"]).int()
        predicted_mention_indices = coref_ops.coref_kernels_extract_mentions(
            candidate_mention_scores, candidate_starts, candidate_ends,
            k)  # ([k], [k])
        predicted_mention_indices.set_shape([None])

        mention_starts = tf.gather(candidate_starts,
                                   predicted_mention_indices)  # [num_mentions]
        mention_ends = tf.gather(candidate_ends,
                                 predicted_mention_indices)  # [num_mentions]
        mention_emb = tf.gather(
            candidate_mention_emb,
            predicted_mention_indices)  # [num_mentions, emb]
        mention_scores = tf.gather(candidate_mention_scores,
                                   predicted_mention_indices)  # [num_mentions]

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_speaker_ids = tf.gather(speaker_ids,
                                        mention_starts)  # [num_mentions]

        max_antecedents = self.config["max_antecedents"]
        antecedents, antecedent_labels, antecedents_len = coref_ops.coref_kernels_antecedents(
            mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids,
            max_antecedents
        )  # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
        antecedents.set_shape([None, None])
        antecedent_labels.set_shape([None, None])
        antecedents_len.set_shape([None])

        antecedent_scores = self.get_antecedent_scores(
            mention_emb, mention_scores, antecedents, antecedents_len,
            mention_starts, mention_ends, mention_speaker_ids,
            genre_emb)  # [num_mentions, max_ant + 1]

        loss = self.softmax_loss(antecedent_scores,
                                 antecedent_labels)  # [num_mentions]
        loss = tf.sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            mention_starts, mention_ends, antecedents, antecedent_scores
        ], loss
Esempio n. 9
0
    def get_predictions_and_loss(self, inputs):
        tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, is_training, gold_starts, gold_ends = inputs
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask,
            self.lstm_dropout)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]

        candidate_labels = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts,
            gold_ends)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]

        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb, self.dropout)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        loss = self.sigmoid_loss(candidate_mention_scores, candidate_labels)
        top_span_starts, top_span_ends = self.get_top_mentions(
            num_words, candidate_starts, candidate_ends,
            candidate_mention_scores)

        return [top_span_starts, top_span_ends], loss
Esempio n. 10
0
def get_embeddings(data, sentences, text_len, context_word_emb, head_word_emb,
                   char_index, lm_emb, lexical_dropout):
    """Build word-level representations.
  Args:
    data: LSGNData object.
    sentences: string tokens. [batch_size, max_len]
    text_len: Length of each sentence. [batch_size]
    context_word_embeddings:
    head_word_embedding:
    char_index: Characters
    lm_emb: Cached contextualized embeddings.
    lexical_dropout: Tensor scalar
  """
    num_sentences = tf.shape(context_word_emb)[0]
    max_sentence_length = tf.shape(context_word_emb)[1]
    context_emb_list = [context_word_emb]
    head_emb_list = [head_word_emb]

    # Processing char embeddings for batch of sentences
    if data.char_embedding_size > 0:
        char_emb = tf.gather(
            tf.get_variable("char_embeddings",
                            [len(data.char_dict), data.char_embedding_size]),
            char_index
        )  # [num_sentences, max_sentence_length, max_word_length, emb]

        flattened_char_emb = tf.reshape(char_emb, [
            num_sentences * max_sentence_length,
            util.shape(char_emb, 2),
            util.shape(char_emb, 3)
        ])  # [num_sentences * max_sentence_length, max_word_length, emb]

        flattened_aggregated_char_emb = util.cnn(
            flattened_char_emb, data.config["filter_widths"],
            data.config["filter_size"]
        )  # [num_sentences * max_sentence_length,  num-filters * len(filter-sizes)]

        aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
            num_sentences, max_sentence_length,
            util.shape(flattened_aggregated_char_emb, 1)
        ])  # [num_sentences, max_sentence_length, emb]
        context_emb_list.append(aggregated_char_emb)
        head_emb_list.append(aggregated_char_emb)

    # Processing lm embeddings
    if data.lm_file or data.lm_hub:
        # Alternatively, we could initialize module/aggregation/* from here.
        with tf.variable_scope("lm_aggregation"):
            lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [data.lm_layers],
                                initializer=tf.constant_initializer(0.0)))
            lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))

        # Load lm_embeddings from hub.
        #     if data.lm_hub:
        # lm_embeddings = data.lm_hub(
        #     inputs={"tokens": sentences, "sequence_len": text_len},
        #     signature="tokens", as_dict=True)
        #   word_emb = tf.expand_dims(lm_embeddings["word_emb"], 3)  # [B, slen, 512]
        #   lm_emb = tf.concat([
        #       tf.concat([word_emb, word_emb], 2),  # [B, slen, 1024, 1]
        #       tf.expand_dims(lm_embeddings["lstm_outputs1"], 3),
        #       tf.expand_dims(lm_embeddings["lstm_outputs2"], 3)], 3)  # [B, slen, 1024, 3]

        lm_emb_size = util.shape(lm_emb, 2)  # TODO: Might not need this.
        lm_num_layers = util.shape(lm_emb, 3)
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers
             ])  # [num_sentences * max_sentence_length * emb, layers]
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= lm_scaling
        context_emb_list.append(aggregated_lm_emb)
    else:
        lm_weights = None
        lm_scaling = None

    # Concatenate and apply dropout.
    context_emb = tf.concat(
        context_emb_list,
        2)  # [num_sentences, max_sentence_length, emb concated]
    head_emb = tf.concat(head_emb_list,
                         2)  # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, lexical_dropout)
    head_emb = tf.nn.dropout(head_emb, lexical_dropout)

    return context_emb, head_emb, lm_weights, lm_scaling
Esempio n. 11
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len,
                                 speaker_ids, is_training, gold_starts, gold_ends, number_features, plurality_features,
                                 candidate_positions, pronoun_positions, status_positions, name_positions, labels,
                                 candidate_mask):
        all_k = util.shape(number_features, 0)
        all_c = util.shape(number_features, 1)

        #  dropout
        self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
        self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

        num_sentences = tf.shape(context_word_emb)[0]  # 当前example的sentence个数
        max_sentence_length = tf.shape(context_word_emb)[1]  # sentences中最长的句子长度

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        # character emb
        if self.config["char_embedding_size"] > 0:
            # [num_sentences, max_sentence_length, max_word_length, emb]  [?, ?, ?, 8]
            value = tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]])
            char_emb = tf.gather(value, char_index)

            # [num_sentences * max_sentence_length, max_word_length, emb] [?, ?, 8]
            flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2),
                                                       util.shape(char_emb, 3)])

            # [num_sentences * max_sentence_length, emb] [?, 150]
            flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config[
                "filter_size"])

            # [num_sentences, max_sentence_length, emb] [?, ?, 150]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length,
                                                                             util.shape(flattened_aggregated_char_emb,
                                                                                        1)])
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        lm_emb_size = util.shape(lm_emb, 2)  # 1024
        lm_num_layers = util.shape(lm_emb, 3)  # 3
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))

        # reshape lm_emb [?, 3]
        flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])

        # lm_emb matmul weight matrix [num_sentences * max_sentence_length * emb, 1]
        flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1))  # [?, 1]

        # lm_emb reshape [?, ?, 1024]
        aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling

        # add elmo emb to context_emb_list
        if self.config['use_elmo']:
            context_emb_list.append(aggregated_lm_emb)

        # add context_emb to context_emb_list [num_sentences, max_sentence_length, emb] [?, ?, 1474] cat多个embedding表示
        context_emb = tf.concat(context_emb_list, 2)

        # add head emb to head_emb_list [num_sentences, max_sentence_length, emb] [?, ?, 450]  cat多个head embedding表示
        head_emb = tf.concat(head_emb_list, 2)

        # [num_sentences, max_sentence_length, emb] [?, ?, 1474]
        context_emb = tf.nn.dropout(context_emb, self.lexical_dropout)

        # [num_sentences, max_sentence_length, emb] [?, ?, 450]
        head_emb = tf.nn.dropout(head_emb, self.lexical_dropout)

        # [num_sentence, max_sentence_length]
        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)

        # context to lstm [num_words, emb] [?, 400]
        context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask)
        num_words = util.shape(context_outputs, 0)

        # [num_words] [?, 450]
        flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask)

        top_span_starts = gold_starts
        top_span_ends = gold_ends

        # get span emb [?, 1270]
        top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, top_span_starts, top_span_ends)

        # [k, max_candidate, embedding] [?, ?, 1270]
        candidate_NP_embeddings = tf.gather(top_span_emb, candidate_positions)

        # [k1, embedding]
        pronoun_embedding = tf.gather(top_span_emb, pronoun_positions)

        # [k2, embedding]
        status_embedding = tf.gather(top_span_emb, status_positions)

        # [k3, embedding]
        name_embedding = tf.gather(top_span_emb, name_positions)

        # [k, max_candidate]
        candidate_starts = tf.gather(top_span_starts, candidate_positions)

        # [k, 1] [?, ?]
        pronoun_starts = tf.gather(top_span_starts, pronoun_positions)

        # [k] [?, ?]
        top_span_speaker_ids = tf.gather(speaker_ids, candidate_starts)

        # [k, 1] [?, ?]
        pronoun_speaker_id = tf.gather(speaker_ids, pronoun_starts)

        mention_offsets = tf.range(util.shape(top_span_emb, 0)) + 1
        candidate_NP_offsets = tf.gather(mention_offsets, candidate_positions)
        pronoun_offsets = tf.gather(mention_offsets, pronoun_positions)
        k = util.shape(pronoun_positions, 0)
        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                coreference_scores = self.get_coreference_score(candidate_NP_embeddings, pronoun_embedding,
                                                                top_span_speaker_ids,
                                                                pronoun_speaker_id, candidate_NP_offsets,
                                                                pronoun_offsets, number_features,
                                                                plurality_features)  # [k, c]
        score_after_softmax = tf.nn.softmax(coreference_scores, 1)  # [k, c]
        if self.config['softmax_pruning']:
            threshold = tf.ones([all_k, all_c]) * self.config['softmax_threshold']  # [k, c]
        else:
            threshold = tf.zeros([all_k, all_c]) - tf.ones([all_k, all_c])
        ranking_mask = tf.to_float(tf.greater(score_after_softmax, threshold))  # [k, c]

        if self.config['apply_knowledge']:
            with tf.variable_scope("knowledge_layer"):
                # [k, c]
                knowledge_score, merged_score, attention_score, diagonal_mask, square_mask = self.get_knowledge_score(
                    candidate_NP_embeddings, number_features, plurality_features, candidate_mask * ranking_mask)

            coreference_scores = coreference_scores + knowledge_score  # [k, c]
            if self.config['knowledge_pruning']:
                knowledge_score_after_softmax = tf.nn.softmax(knowledge_score, 1)  # [k, c]
                knowledge_threshold = tf.ones([all_k, all_c]) * self.config['softmax_threshold']  # [k, c]
                knowledge_ranking_mask = tf.to_float(
                    tf.greater(knowledge_score_after_softmax, knowledge_threshold))  # [k, c]
                ranking_mask = ranking_mask * knowledge_ranking_mask

        # dummy_scores为零向量,不需要softmax,所以让coreference_scores进softmax,再将output和dummy_scores进行concat。
        top_antecedent_scores = tf.concat([dummy_scores, coreference_scores], 1)  # [k, c + 1]

        # labels为正,且得分在threshold之上的为True.
        labels = tf.logical_and(labels, tf.greater(score_after_softmax, threshold))

        dummy_mask_1 = tf.ones([k, 1])
        dummy_mask_0 = tf.zeros([k, 1])

        mask_for_prediction = tf.concat([dummy_mask_0, candidate_mask], 1)
        ranking_mask_for_prediction = tf.concat([dummy_mask_0, ranking_mask], 1)

        if self.config['random_sample_training']:
            random_mask = tf.greater(tf.random_uniform([all_k, all_c]), tf.ones([all_k, all_c]) * 0.3)
            labels = tf.logical_and(labels, random_mask)
            ranking_mask = ranking_mask * tf.to_float(random_mask)

        dummy_labels = tf.logical_not(tf.reduce_any(labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, labels], 1)  # [k, c + 1]
        mask_for_training = tf.concat([dummy_mask_1, candidate_mask], 1)
        ranking_mask_for_training = tf.concat([dummy_mask_1, ranking_mask], 1)
        loss = self.softmax_loss(top_antecedent_scores * mask_for_training * ranking_mask_for_training,
                                 top_antecedent_labels)
        loss = tf.reduce_sum(loss)  # []

        return [top_antecedent_scores * mask_for_prediction * ranking_mask_for_prediction,
                score_after_softmax * candidate_mask], loss
Esempio n. 12
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tags, ner_tags, categories, ner_ids, cat_glove, domain_labels, l):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    if self.config["use_pos_tag"]:
      text_emb_list.append(pos_tags)

    if self.config["use_ner_g"]:
      text_emb_list.append(ner_tags)

    if self.config["use_categories"]:
      text_emb_list.append(categories)

    if self.config["use_categories_glove"]:
      text_emb_list.append(cat_glove)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]
    # print "------------------------------"
    # print "GENRE EMB"
    # print genre_emb.shape, genre

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    # get_mention_scores call util.ffnn
    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores, candidate_starts, candidate_ends, k) # ([k], [k])
    predicted_mention_indices.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    mention_ner_ids = tf.gather(ner_ids, mention_starts)

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts, mention_ends, gold_starts, gold_ends, cluster_ids, max_antecedents) # ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    # get_antecedent_scores calls util.ffnn
    antecedent_scores, pair_emb = self.get_antecedent_scores(mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb, mention_ner_ids) # [num_mentions, max_ant + 1]

    # antecedent scores are floating points
    # antecedent labels are True/False
    # softmax_loss converts True/False into floating points
    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    print "---------------------------------------"
    print "PAIR EMB"
    print pair_emb.shape
    print "---------------------------------------"
    print "CANDIDATE"
    print candidate_mention_emb.shape
    print "---------------------------------------"


    # if self.config["use_dann"]:
    d_logits = util.dann(candidate_mention_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="1")
    # elif self.config["use_dann_pairwise"]:
    d_logits2 = util.dann(pair_emb, self.config["ffnn_size"], 
                                len(self.genres), self.dropout, l=l, name="2")
    print "---------------------------------------"
    print "D_LOGITS"
    print d_logits2.shape
    print "---------------------------------------"
    d_probs2 = tf.nn.softmax(d_logits2)

    d_probs = tf.nn.softmax(d_logits) # P(genre | mentions)
    neg_ll = -tf.log(tf.clip_by_value(d_probs,1e-10,1.0)) # N x 7
    N = tf.shape(neg_ll)[0]

    # argmax of d_probs are predicted domains
    pred_domains = tf.argmax(d_probs, 1)
    
    # changing 1 x 7 domain labels to N x 7
    tiled_domain_labels = tf.tile(tf.expand_dims(domain_labels, 0), [N, 1])

    # converting tiles to be comparable to pred_domains
    gold_domains = tf.argmax(tiled_domain_labels, 1)

    # multiply neg_ll with tiled_domain_labels
    pairwise_loss = tf.multiply(neg_ll, tiled_domain_labels) # N x 7
    pairwise_loss_reduced = tf.reduce_sum(pairwise_loss, 0)

    domain_loss = tf.reduce_sum(tf.divide(pairwise_loss_reduced, tf.cast(N, tf.float32)))
    domain_loss_reduce_mean = tf.reduce_sum(tf.reduce_mean(pairwise_loss, 0))

    correct_domain_predictions = tf.equal(pred_domains, gold_domains)
    domain_accuracy = tf.reduce_mean(tf.cast(correct_domain_predictions, tf.float32))

    values = [domain_accuracy, domain_loss_reduce_mean, pairwise_loss_reduced, N, neg_ll, d_logits2, d_logits]

    return [candidate_starts, candidate_ends, candidate_mention_scores, mention_starts, mention_ends, antecedents, antecedent_scores, antecedent_labels, genre], loss, domain_loss, pred_domains, values
Esempio n. 13
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids):
        if self.config["char_embedding_size"] > 0:
            with tf.variable_scope("char_emb", reuse=tf.AUTO_REUSE):
                char_emb = tf.gather(
                    tf.get_variable("char_embeddings", [
                        len(self.char_dict), self.config["char_embedding_size"]
                    ]), char_index
                )  # [num_sentences, max_sentence_length, max_word_length, emb]
        if not self.lm_file:
            elmo_module = hub.Module(str(self.config["elmo_hub_path"]))
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
            self.dropout = self.get_dropout(self.config["dropout_rate"],
                                            is_training)
            self.lexical_dropout = self.get_dropout(
                self.config["lexical_dropout_rate"], is_training)
            self.lstm_dropout = self.get_dropout(
                self.config["lstm_dropout_rate"], is_training)

            num_sentences = tf.shape(context_word_emb)[0]
            max_sentence_length = tf.shape(context_word_emb)[1]

            context_emb_list = [context_word_emb]
            head_emb_list = [head_word_emb]
            if self.config["char_embedding_size"] > 0:
                flattened_char_emb = tf.reshape(
                    char_emb, [
                        num_sentences * max_sentence_length,
                        util.shape(char_emb, 2),
                        util.shape(char_emb, 3)
                    ]
                )  # [num_sentences * max_sentence_length, max_word_length, emb]
                with tf.variable_scope("char_convolution",
                                       reuse=tf.AUTO_REUSE):
                    flattened_aggregated_char_emb = util.cnn(
                        flattened_char_emb, self.config["filter_widths"],
                        self.config["filter_size"]
                    )  # [num_sentences * max_sentence_length, emb]
                aggregated_char_emb = tf.reshape(
                    flattened_aggregated_char_emb, [
                        num_sentences, max_sentence_length,
                        util.shape(flattened_aggregated_char_emb, 1)
                    ])  # [num_sentences, max_sentence_length, emb]
                context_emb_list.append(aggregated_char_emb)
                head_emb_list.append(aggregated_char_emb)

            lm_emb_size = util.shape(lm_emb, 2)
            lm_num_layers = util.shape(lm_emb, 3)
            with tf.variable_scope("lm_aggregation", reuse=tf.AUTO_REUSE):
                self.lm_weights = tf.nn.softmax(
                    tf.get_variable("lm_scores", [lm_num_layers],
                                    initializer=tf.constant_initializer(0.0)))
                self.lm_scaling = tf.get_variable(
                    "lm_scaling", [], initializer=tf.constant_initializer(1.0))
            flattened_lm_emb = tf.reshape(lm_emb, [
                num_sentences * max_sentence_length * lm_emb_size,
                lm_num_layers
            ])
            flattened_aggregated_lm_emb = tf.matmul(
                flattened_lm_emb, tf.expand_dims(
                    self.lm_weights,
                    1))  # [num_sentences * max_sentence_length * emb, 1]
            aggregated_lm_emb = tf.reshape(
                flattened_aggregated_lm_emb,
                [num_sentences, max_sentence_length, lm_emb_size])
            aggregated_lm_emb *= self.lm_scaling
            context_emb_list.append(aggregated_lm_emb)

            context_emb = tf.concat(
                context_emb_list,
                2)  # [num_sentences, max_sentence_length, emb]
            head_emb = tf.concat(
                head_emb_list, 2)  # [num_sentences, max_sentence_length, emb]
            context_emb = tf.nn.dropout(
                context_emb, self.lexical_dropout
            )  # [num_sentences, max_sentence_length, emb]
            head_emb = tf.nn.dropout(
                head_emb, self.lexical_dropout
            )  # [num_sentences, max_sentence_length, emb]

            text_len_mask = tf.sequence_mask(
                text_len, maxlen=max_sentence_length
            )  # [num_sentence, max_sentence_length]

            context_outputs = self.lstm_contextualize(
                context_emb, text_len, text_len_mask)  # [num_words, emb]
            num_words = util.shape(context_outputs, 0)

            with tf.variable_scope("genre_emb", reuse=tf.AUTO_REUSE):
                genre_emb = tf.gather(
                    tf.get_variable(
                        "genre_embeddings",
                        [len(self.genres), self.config["feature_size"]]),
                    genre)  # [emb]

            sentence_indices = tf.tile(
                tf.expand_dims(tf.range(num_sentences), 1),
                [1, max_sentence_length
                 ])  # [num_sentences, max_sentence_length]
            flattened_sentence_indices = self.flatten_emb_by_sentence(
                sentence_indices, text_len_mask)  # [num_words]
            flattened_head_emb = self.flatten_emb_by_sentence(
                head_emb, text_len_mask)  # [num_words]

            candidate_starts = tf.tile(
                tf.expand_dims(tf.range(num_words), 1),
                [1, self.max_span_width])  # [num_words, max_span_width]
            candidate_ends = candidate_starts + tf.expand_dims(
                tf.range(self.max_span_width),
                0)  # [num_words, max_span_width]
            candidate_start_sentence_indices = tf.gather(
                flattened_sentence_indices,
                candidate_starts)  # [num_words, max_span_width]
            candidate_end_sentence_indices = tf.gather(
                flattened_sentence_indices,
                tf.minimum(candidate_ends,
                           num_words - 1))  # [num_words, max_span_width]
            candidate_mask = tf.logical_and(
                candidate_ends < num_words,
                tf.equal(candidate_start_sentence_indices,
                         candidate_end_sentence_indices)
            )  # [num_words, max_span_width]
            flattened_candidate_mask = tf.reshape(
                candidate_mask, [-1])  # [num_words * max_span_width]
            candidate_starts = tf.boolean_mask(
                tf.reshape(candidate_starts, [-1]),
                flattened_candidate_mask)  # [num_candidates]
            candidate_ends = tf.boolean_mask(
                tf.reshape(candidate_ends,
                           [-1]), flattened_candidate_mask)  # [num_candidates]
            candidate_sentence_indices = tf.boolean_mask(
                tf.reshape(candidate_start_sentence_indices, [-1]),
                flattened_candidate_mask)  # [num_candidates]

            candidate_cluster_ids = self.get_candidate_labels(
                candidate_starts, candidate_ends, gold_starts, gold_ends,
                cluster_ids)  # [num_candidates]

            candidate_span_emb_orig, candidate_head_scores = self.get_span_emb(
                flattened_head_emb, context_outputs, candidate_starts,
                candidate_ends)  # [num_candidates, emb]
            self.candidate_span_emb = candidate_span_emb_orig

        def compute_from_emb(candidate_span_emb):
            with tf.variable_scope("prediction_scope", reuse=tf.AUTO_REUSE):

                candidate_mention_scores = self.get_mention_scores(
                    candidate_span_emb)  # [k, 1]
                candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                                      1)  # [k]

                k = tf.to_int32(
                    tf.floor(
                        tf.to_float(tf.shape(context_outputs)[0]) *
                        self.config["top_span_ratio"]))
                top_span_indices = coref_ops.extract_spans(
                    tf.expand_dims(candidate_mention_scores, 0),
                    tf.expand_dims(candidate_starts, 0),
                    tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                    util.shape(context_outputs, 0), True)  # [1, k]
                top_span_indices.set_shape([1, None])
                top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

                top_span_starts = tf.gather(candidate_starts,
                                            top_span_indices)  # [k]
                top_span_ends = tf.gather(candidate_ends,
                                          top_span_indices)  # [k]
                top_span_emb = tf.gather(candidate_span_emb,
                                         top_span_indices)  # [k, emb]
                top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                                 top_span_indices)  # [k]
                top_span_mention_scores = tf.gather(candidate_mention_scores,
                                                    top_span_indices)  # [k]
                top_span_sentence_indices = tf.gather(
                    candidate_sentence_indices, top_span_indices)  # [k]
                top_span_speaker_ids = tf.gather(speaker_ids,
                                                 top_span_starts)  # [k]
                self.head_scores = tf.gather(
                    candidate_head_scores,
                    top_span_indices)  # [k, max_span_width]

                c = tf.minimum(self.config["max_top_antecedents"], k)

                if self.config["coarse_to_fine"]:
                    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                        top_span_emb, top_span_mention_scores, c)
                else:
                    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                        top_span_emb, top_span_mention_scores, c)

                dummy_scores = tf.zeros([k, 1])  # [k, 1]
                for i in range(self.config["coref_depth"]):
                    with tf.variable_scope("coref_layer", reuse=tf.AUTO_REUSE):
                        top_antecedent_emb = tf.gather(
                            top_span_emb, top_antecedents)  # [k, c, emb]
                        top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                            top_span_emb, top_antecedents, top_antecedent_emb,
                            top_antecedent_offsets, top_span_speaker_ids,
                            genre_emb)  # [k, c]
                        top_antecedent_weights = tf.nn.softmax(
                            tf.concat([dummy_scores, top_antecedent_scores],
                                      1))  # [k, c + 1]
                        top_antecedent_emb = tf.concat([
                            tf.expand_dims(top_span_emb, 1), top_antecedent_emb
                        ], 1)  # [k, c + 1, emb]
                        attended_span_emb = tf.reduce_sum(
                            tf.expand_dims(top_antecedent_weights, 2) *
                            top_antecedent_emb, 1)  # [k, emb]
                        with tf.variable_scope("f"):
                            f = tf.sigmoid(
                                util.projection(
                                    tf.concat(
                                        [top_span_emb, attended_span_emb], 1),
                                    util.shape(top_span_emb, -1)))  # [k, emb]
                            top_span_emb = f * attended_span_emb + (
                                1 - f) * top_span_emb  # [k, emb]

                top_antecedent_scores = tf.concat(
                    [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

                top_antecedent_cluster_ids = tf.gather(
                    top_span_cluster_ids, top_antecedents)  # [k, c]
                top_antecedent_cluster_ids += tf.to_int32(
                    tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
                same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                                  tf.expand_dims(
                                                      top_span_cluster_ids,
                                                      1))  # [k, c]
                non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                                     1)  # [k, 1]
                pairwise_labels = tf.logical_and(same_cluster_indicator,
                                                 non_dummy_indicator)  # [k, c]
                dummy_labels = tf.logical_not(
                    tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
                top_antecedent_labels = tf.concat(
                    [dummy_labels, pairwise_labels], 1)  # [k, c + 1]
                loss = self.softmax_loss(top_antecedent_scores,
                                         top_antecedent_labels)  # [k]
                loss = tf.reduce_sum(loss)  # []
                return [
                    candidate_starts, candidate_ends, candidate_mention_scores,
                    top_span_starts, top_span_ends, top_antecedents,
                    top_antecedent_scores
                ], loss

        predictions, loss = compute_from_emb(candidate_span_emb_orig)
        """alpha = self.config['adv_lr']
    num_iters = self.config['adv_num_iters']
    print(tf.shape(candidate_span_emb_orig))
    delta = tf.random.uniform(shape=tf.shape(candidate_span_emb_orig), minval=-1*self.config['adv_perturb_length'], maxval=self.config['adv_perturb_length'])
    print(tf.shape(delta))
    for t in range(num_iters):
        _, adv_loss_t = compute_from_emb(candidate_span_emb_orig+delta)
        delta_grad = tf.gradients(adv_loss_t,
                                  delta,
                                  aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
        delta_grad = tf.stop_gradient(delta_grad)
        delta = tf.clip_by_value(delta+alpha*delta_grad[0,:,:], -1*self.config['adv_perturb_length'], self.config['adv_perturb_length'])
    print(tf.shape(delta))"""
        span_emb_grad, = tf.gradients(
            loss,
            candidate_span_emb_orig,
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
        span_emb_grad = tf.stop_gradient(span_emb_grad)

        def normalize_vec(x, length):
            return length * tf.math.l2_normalize(x)

        perturb_span_emb = normalize_vec(span_emb_grad,
                                         self.config['adv_perturb_length'])
        _, adv_loss = compute_from_emb(candidate_span_emb_orig +
                                       perturb_span_emb)
        # _, adv_loss = compute_from_emb(candidate_span_emb_orig+delta)
        return predictions, (1 - self.config['adv_alpha']
                             ) * loss + self.config['adv_alpha'] * adv_loss
Esempio n. 14
0
    def get_predictions_and_loss(
            self, tokens, context_word_emb, head_word_emb, lm_emb, char_index,
            text_len, speaker_ids, genre, is_training, gold_starts, gold_ends,
            number_features, gender_features, nsubj_features, dobj_features,
            candidate_positions, pronoun_positions, labels, candidate_mask):
        all_k = util.shape(number_features, 0)
        all_c = util.shape(number_features, 1)
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        if self.config['use_elmo']:
            context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        top_span_starts = gold_starts
        top_span_ends = gold_ends
        top_span_emb = self.get_span_emb(flattened_head_emb, context_outputs,
                                         top_span_starts, top_span_ends)
        candidate_NP_embeddings = tf.gather(
            top_span_emb, candidate_positions)  # [k, max_candidate, embedding]
        candidate_starts = tf.gather(top_span_starts,
                                     candidate_positions)  # [k, max_candidate]
        pronoun_starts = tf.gather(top_span_starts,
                                   pronoun_positions)  # [k, 1]
        top_span_speaker_ids = tf.gather(speaker_ids, candidate_starts)  # [k]

        pronoun_embedding = tf.gather(top_span_emb,
                                      pronoun_positions)  # [k, embedding]
        pronoun_speaker_id = tf.gather(speaker_ids, pronoun_starts)  # [k, 1]

        mention_offsets = tf.range(util.shape(top_span_emb, 0)) + 1
        candidate_NP_offsets = tf.gather(mention_offsets, candidate_positions)
        pronoun_offsets = tf.gather(mention_offsets, pronoun_positions)
        k = util.shape(pronoun_positions, 0)
        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                coreference_scores = self.get_coreference_score(
                    candidate_NP_embeddings, pronoun_embedding,
                    top_span_speaker_ids, pronoun_speaker_id, genre_emb,
                    candidate_NP_offsets, pronoun_offsets, number_features,
                    gender_features, nsubj_features, dobj_features)  # [k, c]
        score_after_softmax = tf.nn.softmax(coreference_scores, 1)  # [k, c]
        if self.config['softmax_pruning']:
            threshold = tf.ones(
                [all_k, all_c]) * self.config['softmax_threshold']  # [k, c]
        else:
            threshold = tf.zeros([all_k, all_c]) - tf.ones([all_k, all_c])
        ranking_mask = tf.to_float(tf.greater(score_after_softmax,
                                              threshold))  # [k, c]

        # number_features = tf.boolean_mask(number_features, ranking_mask)
        # gender_features = tf.boolean_mask(gender_features, ranking_mask)
        # nsubj_features = tf.boolean_mask(nsubj_features, ranking_mask)
        # dobj_features = tf.boolean_mask(dobj_features, ranking_mask)
        # coreference_scores = tf.boolean_mask(coreference_scores, ranking_mask)
        # labels = tf.boolean_mask(labels, ranking_mask)
        if self.config['apply_knowledge']:
            with tf.variable_scope("knowledge_layer"):
                knowledge_score, merged_score, attention_score, diagonal_mask, square_mask = self.get_knowledge_score(
                    candidate_NP_embeddings, number_features, gender_features,
                    nsubj_features, dobj_features,
                    candidate_mask * ranking_mask)  # [k, c]

            coreference_scores = coreference_scores + knowledge_score  # [k, c]
            if self.config['knowledge_pruning']:
                knowledge_score_after_softmax = tf.nn.softmax(
                    knowledge_score, 1)  # [k, c]
                knowledge_threshold = tf.ones([
                    all_k, all_c
                ]) * self.config['softmax_threshold']  # [k, c]
                knowledge_ranking_mask = tf.to_float(
                    tf.greater(knowledge_score_after_softmax,
                               knowledge_threshold))  # [k, c]
                ranking_mask = ranking_mask * knowledge_ranking_mask
        else:
            knowledge_score = tf.zeros([all_k, all_c])
            knowledge_score_after_softmax = tf.nn.softmax(knowledge_score,
                                                          1)  # [k, c]
            merged_score = tf.zeros([all_k, all_c])
            attention_score = tf.zeros([all_k, all_c])
            diagonal_mask = tf.zeros([all_k, all_c])
            square_mask = tf.zeros([all_k, all_c])

        top_antecedent_scores = tf.concat([dummy_scores, coreference_scores],
                                          1)  # [k, c + 1]
        labels = tf.logical_and(labels,
                                tf.greater(score_after_softmax, threshold))

        dummy_mask_1 = tf.ones([k, 1])
        dummy_mask_0 = tf.zeros([k, 1])
        mask_for_prediction = tf.concat([dummy_mask_0, candidate_mask], 1)
        ranking_mask_for_prediction = tf.concat([dummy_mask_0, ranking_mask],
                                                1)
        if self.config['random_sample_training']:
            random_mask = tf.greater(tf.random_uniform([all_k, all_c]),
                                     tf.ones([all_k, all_c]) * 0.3)
            labels = tf.logical_and(labels, random_mask)
            ranking_mask = ranking_mask * tf.to_float(random_mask)
        dummy_labels = tf.logical_not(tf.reduce_any(labels, 1,
                                                    keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, labels],
                                          1)  # [k, c + 1]
        mask_for_training = tf.concat([dummy_mask_1, candidate_mask], 1)
        ranking_mask_for_training = tf.concat([dummy_mask_1, ranking_mask], 1)
        loss = self.softmax_loss(
            top_antecedent_scores * mask_for_training *
            ranking_mask_for_training, top_antecedent_labels)
        loss = tf.reduce_sum(loss)  # []

        return [
            top_antecedent_scores * mask_for_prediction *
            ranking_mask_for_prediction, score_after_softmax * candidate_mask
        ], loss
Esempio n. 15
0
  def get_predictions_and_loss(self, word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids):
    self.dropout = 1 - (tf.to_float(is_training) * self.config["dropout_rate"])
    self.lexical_dropout = 1 - (tf.to_float(is_training) * self.config["lexical_dropout_rate"])

    num_sentences = tf.shape(word_emb)[0]
    max_sentence_length = tf.shape(word_emb)[1]

    text_emb_list = [word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      text_emb_list.append(aggregated_char_emb)

    text_emb = tf.concat(text_emb_list, 2)
    text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
    text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
    self.tm_shape = tf.shape(text_len_mask)

    text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
    text_outputs = tf.nn.dropout(text_outputs, self.dropout)
    self.txt_shape = tf.shape(text_outputs)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_text_emb = self.flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    self.flattened_sentence_indices = flattened_sentence_indices
    self.emb_shape = tf.shape(flattened_text_emb)

    candidate_starts, candidate_ends = coref_ops.spans(
      sentence_indices=flattened_sentence_indices,
      max_width=self.max_mention_width)
    candidate_starts.set_shape([None])
    candidate_ends.set_shape([None])

    candidate_mention_emb = self.get_mention_emb(flattened_text_emb, text_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_mention_emb) # [num_mentions, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [num_mentions]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(text_outputs)[0]) * self.config["mention_ratio"]))
    # predicted_mention_indices, context_starts, context_ends, context_length = coref_ops.extract_mentions(candidate_mention_scores,
    #                           candidate_starts,
    #                           candidate_ends,
    #                           k,
    #                           self.max_context_width) # ([k], [k])
    predicted_mention_indices = coref_ops.extract_mentions(candidate_mention_scores,
                              candidate_starts,
                              candidate_ends,
                              k) # ([k], [k])
    predicted_mention_indices.set_shape([None])
    # context_starts.set_shape([None])
    # context_ends.set_shape([None])
    # context_length.set_shape([None])

    mention_starts = tf.gather(candidate_starts, predicted_mention_indices) # [num_mentions]
    mention_ends = tf.gather(candidate_ends, predicted_mention_indices) # [num_mentions]
    mention_emb = tf.gather(candidate_mention_emb, predicted_mention_indices) # [num_mentions, emb]
    mention_scores = tf.gather(candidate_mention_scores, predicted_mention_indices) # [num_mentions]

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_speaker_ids = tf.gather(speaker_ids, mention_starts) # [num_mentions]

    context_starts = tf.maximum(mention_starts - 5, 0)
    context_ends = tf.minimum(mention_ends + 5, util.shape(text_outputs, 0) - 1)

    context_start_emb = tf.gather(text_outputs, context_starts) # [num_mentions, emb]
    context_end_emb = tf.gather(text_outputs, context_ends) # [num_mentions, emb]

    max_antecedents = self.config["max_antecedents"]
    antecedents, antecedent_labels, antecedents_len = coref_ops.antecedents(mention_starts,
                                                                              mention_ends,
                                                                              gold_starts,
                                                                              gold_ends,
                                                                              cluster_ids,
                                                                              max_antecedents)# ([num_mentions, max_ant], [num_mentions, max_ant + 1], [num_mentions]
    antecedents.set_shape([None, None])
    antecedent_labels.set_shape([None, None])
    antecedents_len.set_shape([None])

    antecedent_scores = self.get_context_antecedent_scores(mention_emb,
                                                    mention_scores,
                                                    antecedents,
                                                    antecedents_len,
                                                    mention_starts,
                                                    mention_ends,
                                                    mention_speaker_ids,
                                                    genre_emb,
                                                    context_starts,
                                                    context_ends,
                                                    text_outputs,
                                                    flattened_text_emb) # [num_mentions, max_ant + 1]

    loss = self.softmax_loss(antecedent_scores, antecedent_labels) # [num_mentions]
    loss = tf.reduce_sum(loss) # []

    return [
            candidate_starts,
            candidate_ends,
            candidate_mention_scores,
            mention_starts,
            mention_ends,
            antecedents,
            antecedent_scores
          ], loss
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        # self.a , self.b = text_len , max_sentence_length
        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        k = tf.minimum(500, k)
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        # c = tf.minimum(self.config["max_top_antecedents"], k)
        # self.top = top_span_emb

        orig_dim = 1270
        with tf.name_scope("transformer"):
            with tf.name_scope("embedding_transformer"):
                W = tf.Variable(tf.random_normal((orig_dim, self.new_dim)))
                b = tf.Variable(tf.random_normal((self.new_dim, )))
                temp_input = tf.nn.relu(tf.matmul(top_span_emb, W) + b)

            padding_mask_partial = tf.cast(tf.sequence_mask(
                tf.shape(temp_input)[0], maxlen=self.seq_length),
                                           dtype=tf.float32)
            multiples = [self.seq_length]
            padding_mask_partial2 = tf.tile(padding_mask_partial, multiples)
            enc_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            # enc_padding_mask  = tf.matrix_set_diag(enc_padding_mask, tf.zeros(enc_padding_mask.shape[0:-1]), name=None)

            dec_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            dec_padding_mask = tf.matrix_set_diag(
                dec_padding_mask,
                tf.zeros(dec_padding_mask.shape[0:-1]),
                name=None)

            look_ahead_mask = create_look_ahead_mask(
                tf.shape(padding_mask_partial)[0])
            combined_mask = tf.minimum(enc_padding_mask, look_ahead_mask)

            s = tf.shape(temp_input)
            paddings = [[0, self.seq_length - s[0]], [0, 0]]
            padded_embd = tf.pad(temp_input, paddings, "CONSTANT")

            predictions, _ = self.sample_transformer(padded_embd, padded_embd,
                                                     True, enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)

            # self.chikka = predictions
            # self.chikka2 = predictions[:k]
            top_span_emb = tf.concat([predictions[:k], top_span_emb], 1)

        # hidd = self.new_dim // 3
        # with tf.name_scope("Scorer"):
        #    h1_1 = tf.layers.dense(predictions, hidd)
        #    h1_2 = tf.layers.dense(predictions, hidd)
        #    h1 = tf.concat([h1_1 , h1_2] , 1 )
        #       W2 = tf.Variable(tf.random_normal((hidd*2, 1)))
        #       b2 = tf.Variable(tf.random_normal((1,)))
        #       score = tf.nn.relu(tf.matmul(h1, W) + b)

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]

        # with tf.variable_scope("coref_layer"):
        #     top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        #     top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c]

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Esempio n. 17
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids, inject_starts, inject_ends):

        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        if self._use_injected_mentions(is_training):
            candidate_starts = tf.transpose(tf.expand_dims(inject_starts, 1))
            candidate_ends = tf.transpose(tf.expand_dims(inject_ends, 1))
        else:
            candidate_starts = tf.tile(
                tf.expand_dims(tf.range(num_words), 1),
                [1, self.max_span_width])  # [num_words, max_span_width]
            candidate_ends = candidate_starts + tf.expand_dims(
                tf.range(self.max_span_width),
                0)  # [num_words, max_span_width]

        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        if self._use_injected_mentions(is_training):
            k = tf.shape(candidate_starts)[0]
            top_span_indices = tf.expand_dims(tf.range(k), 0)
        else:
            k = tf.to_int32(
                tf.floor(
                    tf.to_float(tf.shape(context_outputs)[0]) *
                    self.config["top_span_ratio"]))
            top_span_indices = coref_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(candidate_starts, 0),
                tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                util.shape(context_outputs, 0), True)  # [1, k]

        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Esempio n. 18
0
    def get_predictions_and_loss(self, context_word_emb, head_word_emb, lm_emb,
                                 char_index, text_len, is_training,
                                 gold_starts, gold_ends, antecedents,
                                 antecedents_len, anaphors, gold_labels):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if self.lm_file:
            lm_emb_size = util.shape(lm_emb, 2)
            lm_num_layers = util.shape(lm_emb, 3)
            with tf.variable_scope("lm_aggregation"):
                self.lm_weights = tf.nn.softmax(
                    tf.get_variable("lm_scores", [lm_num_layers],
                                    initializer=tf.constant_initializer(0.0)))
                self.lm_scaling = tf.get_variable(
                    "lm_scaling", [], initializer=tf.constant_initializer(1.0))
            flattened_lm_emb = tf.reshape(lm_emb, [
                num_sentences * max_sentence_length * lm_emb_size,
                lm_num_layers
            ])
            flattened_aggregated_lm_emb = tf.matmul(
                flattened_lm_emb, tf.expand_dims(
                    self.lm_weights,
                    1))  # [num_sentences * max_sentence_length * emb, 1]
            aggregated_lm_emb = tf.reshape(
                flattened_aggregated_lm_emb,
                [num_sentences, max_sentence_length, lm_emb_size])
            aggregated_lm_emb *= self.lm_scaling
            context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]

        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        mention_emb = self.get_span_emb(flattened_head_emb, context_outputs,
                                        gold_starts, gold_ends)

        k = util.shape(antecedents, 0)
        c = util.shape(antecedents, 1)

        anaphor_emb = tf.gather(mention_emb, anaphors)  #[k,emb]
        antecedent_emb = tf.gather(mention_emb, antecedents)  # [k, c, emb]

        pair_emb = self.get_pair_embeddings(anaphor_emb, antecedents,
                                            antecedent_emb)  # [k, c,emb]

        with tf.variable_scope("plural_scores"):
            plural_scores = util.ffnn(pair_emb, self.config["ffnn_depth"],
                                      self.config["ffnn_size"], 1,
                                      self.dropout)  # [k, c, 1]
            plural_scores = tf.squeeze(plural_scores, 2)  # [k, c]
            plural_scores = plural_scores + tf.log(
                tf.sequence_mask(antecedents_len, c, tf.float32))

        dummy_scores = tf.zeros([k, 1])
        dummy_labels = tf.logical_not(
            tf.reduce_any(gold_labels, 1, keepdims=True))  # [k, 1]

        plural_scores_with_dummy = tf.concat([dummy_scores, plural_scores], 1)
        gold_labels_with_dummy = tf.concat([dummy_labels, gold_labels], 1)

        loss = self.softmax_loss(plural_scores_with_dummy,
                                 gold_labels_with_dummy)
        loss = tf.reduce_sum(loss)

        return [plural_scores, antecedents_len, anaphors], loss