def add_global_voting_op(self):
        with tf.variable_scope("global_voting"):
            self.final_scores_before_global = - (1 - self.loss_mask) * 50 + self.final_scores
            gmask = tf.to_float(((self.final_scores_before_global - self.args.global_thr) >= 0))  # [b,s,30]

            masked_entity_emb = self.pure_entity_embeddings * tf.expand_dims(gmask, axis=3)  # [b,s,30,300] * [b,s,30,1]
            batch_size = tf.shape(masked_entity_emb)[0]
            all_voters_emb = tf.reduce_sum(tf.reshape(masked_entity_emb, [batch_size, -1, 300]), axis=1,
                                           keep_dims=True)  # [b, 1, 300]
            span_voters_emb = tf.reduce_sum(masked_entity_emb, axis=2)  # [batch, num_of_spans, 300]
            valid_voters_emb = all_voters_emb - span_voters_emb
            # [b, 1, 300] - [batch, spans, 300] = [batch, spans, 300]  (broadcasting)
            # [300] - [batch, spans, 300]  = [batch, spans, 300]  (broadcasting)
            valid_voters_emb = tf.nn.l2_normalize(valid_voters_emb, dim=2)

            self.global_voting_scores = tf.squeeze(tf.matmul(self.pure_entity_embeddings, tf.expand_dims(valid_voters_emb, axis=3)), axis=3)
            # [b,s,30,300] matmul [b,s,300,1] --> [b,s,30,1]-->[b,s,30]

            scalar_predictors = tf.stack([self.final_scores_before_global, self.global_voting_scores], 3)
            #print("scalar_predictors = ", scalar_predictors)   #[b, s, 30, 2]
            with tf.variable_scope("psi_and_global_ffnn"):
                if self.args.global_score_ffnn[0] == 0:
                    self.final_scores = util.projection(scalar_predictors, 1)
                else:
                    hidden_layers, hidden_size = self.args.global_score_ffnn[0], self.args.global_score_ffnn[1]
                    self.final_scores = util.ffnn(scalar_predictors, hidden_layers, hidden_size, 1,
                                                  self.dropout if self.args.ffnn_dropout else None)
                # [batch, num_mentions, 30, 1] squeeze to [batch, num_mentions, 30]
                self.final_scores = tf.squeeze(self.final_scores, axis=3)
Esempio n. 2
0
    def add_cand_ent_scores_op(self):
        self.log_cand_entities_scores = tf.log(
            tf.minimum(1.0,
                       tf.maximum(self.args.zero, self.cand_entities_scores)))
        stack_values = []
        if self.args.nn_components.find("lstm") != -1:
            stack_values.append(self.similarity_scores)
        if self.args.nn_components.find("pem") != -1:
            stack_values.append(self.log_cand_entities_scores)
        if self.args.nn_components.find("attention") != -1:
            stack_values.append(self.attention_scores)

        scalar_predictors = tf.stack(stack_values, 3)
        #print("scalar_predictors = ", scalar_predictors)   # [batch, num_mentions, 30, 3]

        with tf.variable_scope("similarity_and_prior_ffnn"):
            if self.args.final_score_ffnn[0] == 0:
                self.final_scores = util.projection(
                    scalar_predictors, 1)  # [batch, num_mentions, 30, 1]
            else:
                hidden_layers, hidden_size = self.args.final_score_ffnn[
                    0], self.args.final_score_ffnn[1]
                self.final_scores = util.ffnn(
                    scalar_predictors, hidden_layers, hidden_size, 1,
                    self.dropout if self.args.ffnn_dropout else None)
            self.final_scores = tf.squeeze(
                self.final_scores,
                axis=3)  # squeeze to [batch, num_mentions, 30]
    def add_cand_ent_scores_op(self):
        # now add the cand_entity_scores maybe also some extra features and through a simple ffnn
        stack_values = []
        if self.args.nn_components.find("lstm") != -1:
            stack_values.append(self.similarity_scores)
        if self.args.nn_components.find("pem") != -1:
            # TODO rename to pem_scores
            self.log_cand_entities_scores = self.custom_pem(
                self.args.pem_without_log, self.args.pem_buckets_boundaries)
            stack_values.append(self.log_cand_entities_scores)
        if self.args.nn_components.find("attention") != -1:
            stack_values.append(self.attention_scores)
        if len(stack_values) == 1:
            # since only one scalar omit the final ffnn
            self.final_scores = stack_values[0]
            return
        scalar_predictors = tf.stack(stack_values, 3)
        #print("scalar_predictors = ", scalar_predictors)   #[batch, num_mentions, 30, 2]

        with tf.variable_scope("similarity_and_prior_ffnn"):
            if self.args.final_score_ffnn[0] == 0:
                self.final_scores = util.projection(scalar_predictors,
                                                    1,
                                                    model=self)
            else:
                hidden_layers, hidden_size = self.args.final_score_ffnn[
                    0], self.args.final_score_ffnn[1]
                self.final_scores = util.ffnn(
                    scalar_predictors,
                    hidden_layers,
                    hidden_size,
                    1,
                    self.dropout if self.args.ffnn_dropout else None,
                    model=self)
            self.final_scores = tf.squeeze(self.final_scores, axis=3)
 def add_lstm_score_op(self):
     #print("cand_entities = ", self.cand_entities)
     with tf.variable_scope("span_emb_ffnn"):
         # [batch, num_mentions, 300]
         # the span embedding can have different size depending on the chosen hyperparameters. We project it to 300
         # dims to match the entity embeddings  (formula 4)
         if self.args.span_emb_ffnn[0] == 0:
             span_emb_projected = util.projection(self.span_emb,
                                                  300,
                                                  model=self)
         else:
             hidden_layers, hidden_size = self.args.span_emb_ffnn[
                 0], self.args.span_emb_ffnn[1]
             span_emb_projected = util.ffnn(
                 self.span_emb,
                 hidden_layers,
                 hidden_size,
                 300,
                 self.dropout if self.args.ffnn_dropout else None,
                 model=self)
             #print("span_emb_projected = ", span_emb_projected)
     # formula (6) <x^m, y_j>   computation. this is the lstm score
     scores = tf.matmul(tf.expand_dims(span_emb_projected, 2),
                        self.entity_embeddings,
                        transpose_b=True)
     #print("scores = ", scores)
     self.similarity_scores = tf.squeeze(
         scores, axis=2)  # [batch, num_mentions, 1, 30]
Esempio n. 5
0
 def add_lstm_score_op(self):
     #print("cand_entities = ", self.cand_entities)
     with tf.variable_scope("span_emb_ffnn"):
         # [batch, num_mentions, 300]
         if self.args.span_emb_ffnn[0] == 0:
             span_emb_projected = util.projection(self.span_emb,
                                                  300,
                                                  model=self)
         else:
             hidden_layers, hidden_size = self.args.span_emb_ffnn[
                 0], self.args.span_emb_ffnn[1]
             span_emb_projected = util.ffnn(
                 self.span_emb,
                 hidden_layers,
                 hidden_size,
                 300,
                 self.dropout if self.args.ffnn_dropout else None,
                 model=self)
             #print("span_emb_projected = ", span_emb_projected)
     scores = tf.matmul(tf.expand_dims(span_emb_projected, 2),
                        self.entity_embeddings,
                        transpose_b=True)
     #print("scores = ", scores)
     self.similarity_scores = tf.squeeze(
         scores, axis=2)  # [batch, num_mentions, 1, 30]
Esempio n. 6
0
    def add_lstm_score_op(self):
        with tf.variable_scope("span_emb_ffnn"):
            # [batch, num_mentions, 300]
            # the span embedding can have different size depending on the chosen hyperparameters. We project it to 300
            # dims to match the entity embeddings  (formula 4)
            if self.args.span_emb_ffnn[0] == 0:
                span_emb_projected = util.projection(self.span_emb, 256)
            else:
                hidden_layers, hidden_size = self.args.span_emb_ffnn[0], self.args.span_emb_ffnn[1]
                span_emb_projected = util.ffnn(self.span_emb, hidden_layers, hidden_size, 300,
                                               self.dropout if self.args.ffnn_dropout else None)
                #print("span_emb_projected = ", span_emb_projected)
        # formula (6) <x^m, y_j>   computation. this is the lstm score
        coeffs = tf.nn.softmax(tf.matmul(span_emb_projected[:, :, None, None, :], self.entity_embeddings, transpose_b=True))
        coeffs = tf.transpose(coeffs, [0, 1, 2, 4, 3])
        ent_emb = tf.reduce_sum(coeffs * self.entity_embeddings, -2)

        scores = tf.matmul(tf.expand_dims(span_emb_projected, 2), ent_emb, transpose_b=True)
        #print("scores = ", scores)
        self.similarity_scores = tf.squeeze(scores, axis=2)  # [batch, num_mentions, 1, 30]
Esempio n. 7
0
    def add_local_attention_op(self):
        attention_entity_emb = self.pure_entity_embeddings if self.args.attention_ent_vecs_no_regularization else self.entity_embeddings
        with tf.variable_scope("attention"):
            K = self.args.attention_K
            left_mask = self._sequence_mask_v13(
                self.begin_span,
                K)  # number of words on the left (left window)
            right_mask = self._sequence_mask_v13(
                tf.expand_dims(self.words_len, 1) - self.end_span, K)
            # number of words on the right. of course i don't get more than K even if more words exist.
            ctxt_mask = tf.concat([left_mask, right_mask],
                                  2)  # [batch, num_of_spans, 2*K]
            ctxt_mask = tf.log(
                tf.minimum(1.0, tf.maximum(self.args.zero, ctxt_mask)))
            #  T,   T,  T, F,  F | T,  T,  F,  F,  F
            # -1, -2, -3, -4, -5  +0, +1, +2, +3, +4

            leftctxt_indices = tf.maximum(
                0,
                tf.range(-1, -K - 1, -1) +
                tf.expand_dims(self.begin_span, 2))  # [batch, num_mentions, K]
            rightctxt_indices = tf.minimum(
                tf.shape(self.pure_word_embeddings)[1] - 1,
                tf.range(K) +
                tf.expand_dims(self.end_span, 2))  # [batch, num_mentions, K]
            ctxt_indices = tf.concat([leftctxt_indices, rightctxt_indices],
                                     2)  # [batch, num_mentions, 2*K]

            batch_index = tf.tile(
                tf.expand_dims(
                    tf.expand_dims(tf.range(tf.shape(ctxt_indices)[0]), 1), 2),
                [1, tf.shape(ctxt_indices)[1],
                 tf.shape(ctxt_indices)[2]])
            ctxt_indices = tf.stack([batch_index, ctxt_indices], 3)
            # [batch, num_of_spans, 2*K, 2]   the last dimension is row,col for gather_nd
            # [batch, num_of_spans, 2*K, [row,col]]

            att_x_w = self.pure_word_embeddings  # [batch, max_sent_len, 300]
            if self.args.attention_on_lstm and self.args.nn_components.find(
                    "lstm") != -1:
                # ablation: here the attention is computed on the output of the lstm layer x_k instead of using the
                # pure word2vec vectors. (word2vec used in paper).
                att_x_w = util.projection(
                    self.context_emb, 300
                )  # if tf.shape(self.context_emb)[-1] != 300 else self.context_emb

            ctxt_word_emb = tf.gather_nd(att_x_w, ctxt_indices)
            # [batch, num_of_spans, 2K, emb_size]    emb_size = 300  only pure word emb used  (word2vec)
            #  and not after we add char emb and dropout

            # in this implementation we don't use the diagonal A and B arrays that are mentioned in
            # Ganea and Hoffmann 2017 (only used in the ablations)
            temp = attention_entity_emb
            if self.args.attention_use_AB:
                att_A = tf.get_variable("att_A", [300])
                temp = att_A * attention_entity_emb
            scores = tf.matmul(ctxt_word_emb, temp, transpose_b=True)
            scores = tf.reduce_max(
                scores, reduction_indices=[-1]
            )  # max score of each word for each span acquired from any cand entity
            scores = scores + ctxt_mask  # some words are not valid out of window so we assign to them very low score
            top_values, _ = tf.nn.top_k(scores, self.args.attention_R)
            # [batch, num_of_spans, R]
            R_value = top_values[:, :, -1]  # [batch, num_of_spans]
            R_value = tf.maximum(self.args.zero,
                                 R_value)  # so to avoid keeping words that
            # have max score with any of the entities <=0 (also score = 0 can have words with
            # padding candidate entities)

            threshold = tf.tile(tf.expand_dims(R_value, 2), [1, 1, 2 * K])
            # [batch, num_of_spans, 2K]
            scores = scores - tf.to_float(
                ((scores - threshold) <
                 0)) * 50  # 50 where score<thr, 0 where score>=thr
            scores = tf.nn.softmax(scores, dim=2)  # [batch, num_of_spans, 2K]
            scores = tf.expand_dims(scores, 3)  # [batch, num_of_spans, 2K, 1]
            #    [batch, num_of_spans, 2K, 1]  *  [batch, num_of_spans, 2K, emb_size]
            # =  [batch, num_of_spans, 2K, emb_size]
            x_c = tf.reduce_sum(scores * ctxt_word_emb,
                                2)  # =  [batch, num_of_spans, emb_size]
            if self.args.attention_use_AB:
                att_B = tf.get_variable("att_B", [300])
                x_c = att_B * x_c
            x_c = tf.expand_dims(x_c, 3)  # [batch, num_of_spans, emb_size, 1]
            # [batch, num_of_spans, 30, emb_size=300]  mul with  [batch, num_of_spans, emb_size, 1]
            x_e__x_c = tf.matmul(attention_entity_emb,
                                 x_c)  # [batch, num_of_spans, 30, 1]
            x_e__x_c = tf.squeeze(x_e__x_c,
                                  axis=3)  # [batch, num_of_spans, 30]
            self.attention_scores = x_e__x_c
Esempio n. 8
0
    def add_span_emb_op(self):
        mention_emb_list = []
        # span embedding based on boundaries (start, end) and head mechanism. but do that on top of contextual bilistm
        # output or on top of original word+char embeddings. this flag determines that. The parer reports results when
        # using the contextual lstm emb as it achieves better score. Used for ablation studies.
        boundaries_input_vecs = self.word_embeddings if self.args.span_boundaries_from_wordemb else self.context_emb

        # the span embedding is modeled by g^m = [x_q; x_r; \hat(x)^m]  (formula (2) of paper)
        # "boundaries" mean use x_q and x_r.   "head" means use also the head mechanism \hat(x)^m (formula (3))
        if self.args.span_emb.find("boundaries") != -1:
            # shape (batch, num_of_cand_spans, emb)
            mention_start_emb = tf.gather_nd(
                boundaries_input_vecs,
                tf.stack([
                    tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.begin_span)[0]),
                                       1), [1, tf.shape(self.begin_span)[1]]),
                    self.begin_span
                ], 2))  # extracts the x_q embedding for each candidate span
            # the tile command creates a 2d tensor with the batch information. first lines contains only zeros, second
            # line ones etc...  because the begin_span tensor has the information which word inside this sentence is the
            # beginning of the candidate span.
            mention_emb_list.append(mention_start_emb)

            mention_end_emb = tf.gather_nd(
                boundaries_input_vecs,
                tf.stack([
                    tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.begin_span)[0]),
                                       1), [1, tf.shape(self.begin_span)[1]]),
                    tf.nn.relu(self.end_span - 1)
                ], 2))  # -1 because the end of span in exclusive  [start, end)
            # relu so that the 0 doesn't become -1 of course no valid candidate span end index is zero since [0,0) is empty
            mention_emb_list.append(mention_end_emb)
            #print("mention_start_emb = ", mention_start_emb)
            #print("mention_end_emb = ", mention_end_emb)

        mention_width = self.end_span - self.begin_span  # [batch, num_mentions]     the width of each candidate span

        if self.args.span_emb.find(
                "head") != -1:  # here the attention is computed
            # here the \hat(x)^m is computed (formula (2) and (3))
            self.max_mention_width = tf.minimum(
                self.args.max_mention_width,
                tf.reduce_max(self.end_span - self.begin_span))
            mention_indices = tf.range(self.max_mention_width) + \
                              tf.expand_dims(self.begin_span, 2)  # [batch, num_mentions, max_mention_width]
            mention_indices = tf.minimum(
                tf.shape(self.word_embeddings)[1] - 1,
                mention_indices)  # [batch, num_mentions, max_mention_width]
            #print("mention_indices = ", mention_indices)
            batch_index = tf.tile(
                tf.expand_dims(
                    tf.expand_dims(tf.range(tf.shape(mention_indices)[0]), 1),
                    2), [
                        1,
                        tf.shape(mention_indices)[1],
                        tf.shape(mention_indices)[2]
                    ])
            mention_indices = tf.stack([batch_index, mention_indices], 3)
            # [batch, num_mentions, max_mention_width, [row,col] ]    4d tensor

            # for the boundaries we had the option to take them either from x_k (output of bilstm) or from v_k
            # the head is derived either from the same option as boundaries or from the v_k.
            head_input_vecs = boundaries_input_vecs if self.args.model_heads_from_bilstm else self.word_embeddings
            mention_text_emb = tf.gather_nd(head_input_vecs, mention_indices)
            # [batch, num_mentions, max_mention_width, 500 ]    4d tensor
            #print("mention_text_emb = ", mention_text_emb)

            with tf.variable_scope("head_scores"):
                # from [batch, max_sent_len, 300] to [batch, max_sent_len, 1]
                self.head_scores = util.projection(boundaries_input_vecs, 1)
            # [batch, num_mentions, max_mention_width, 1]
            mention_head_scores = tf.gather_nd(self.head_scores,
                                               mention_indices)
            # print("mention_head_scores = ", mention_head_scores)

            # depending on tensorflow version we do the same with different operations (since each candidate span is not
            # of the same length we mask out the invalid indices created above (mention_indices)).
            temp_mask = self._sequence_mask_v13(mention_width,
                                                self.max_mention_width)
            # still code for masking invalid indices for the head computation
            mention_mask = tf.expand_dims(
                temp_mask, 3)  # [batch, num_mentions, max_mention_width, 1]
            mention_mask = tf.minimum(1.0,
                                      tf.maximum(self.args.zero,
                                                 mention_mask))  # 1e-3
            # formula (3) computation
            mention_attention = tf.nn.softmax(
                mention_head_scores + tf.log(mention_mask),
                dim=2)  # [batch, num_mentions, max_mention_width, 1]
            mention_head_emb = tf.reduce_sum(mention_attention *
                                             mention_text_emb,
                                             2)  # [batch, num_mentions, emb]
            #print("mention_head_emb = ", mention_head_emb)
            mention_emb_list.append(mention_head_emb)

        self.span_emb = tf.concat(
            mention_emb_list, 2
        )  # [batch, num_mentions, emb i.e. 1700] formula (2) concatenation
    def add_global_voting_op(self):
        with tf.variable_scope("global_voting"):
            self.final_scores_before_global = -(
                1 - self.loss_mask) * 50 + self.final_scores
            if self.args.global_topkfromallspans:
                batch_num = tf.shape(self.final_scores)[0]
                spans_num = tf.shape(self.final_scores)[1]  # num of spans
                cand_ent_num = tf.shape(self.final_scores)[2]  # 30
                new_size = spans_num * cand_ent_num
                temp = tf.diag(tf.ones([spans_num]))
                temp = tf.tile(tf.expand_dims(temp, axis=2),
                               [1, 1, cand_ent_num])
                temp = tf.reshape(temp, [spans_num, new_size])
                mask = tf.reshape(
                    tf.tile(tf.expand_dims(temp, axis=1),
                            [1, cand_ent_num, 1]), [new_size, new_size])
                mask = 1 - mask

                all_entities = tf.reshape(self.pure_entity_embeddings,
                                          [batch_num, new_size, 300])
                all_scores = tf.matmul(
                    all_entities, all_entities,
                    transpose_b=True)  # [batch, new_size, new_size]
                filtered_scores = all_scores * mask

                top_values, _ = tf.nn.top_k(filtered_scores,
                                            self.args.global_topkfromallspans)
                # [batch, new_size, K]
                if self.args.global_topkfromallspans_onlypositive:
                    top_values = tf.maximum(top_values, self.args.zero)
                    # so to avoid keeping cand ent that have score < of this value even if they are the
                self.global_voting_scores = tf.reduce_mean(
                    top_values, axis=2)  # [batch, new_size]
                self.global_voting_scores = tf.reshape(
                    self.global_voting_scores,
                    [batch_num, spans_num, cand_ent_num])
            else:
                if self.args.global_gmask_unambigious:
                    gmask = self._sequence_mask_v13(
                        tf.equal(self.cand_entities_len, 1),
                        tf.shape(self.final_scores)[2])
                elif not self.args.global_topk:
                    gmask = tf.to_float(
                        ((self.final_scores_before_global -
                          self.args.global_thr) >= 0))  # [b,s,30]
                else:
                    top_values, _ = tf.nn.top_k(
                        self.final_scores_before_global, self.args.global_topk)
                    # [batch, num_of_spans, K]
                    K_value = top_values[:, :, -1]  # [batch, num_of_spans]
                    #if hasattr(self.args, 'global_topkthr'):
                    if self.args.global_topkthr:
                        K_value = tf.maximum(self.args.global_topkthr, K_value)
                        # so to avoid keeping cand ent that have score < of this value even if they are the
                        # top for this span.                                          30
                    threshold = tf.tile(
                        tf.expand_dims(K_value, 2),
                        [1, 1, tf.shape(self.final_scores)[-1]])
                    # [batch, num_of_spans, 30]
                    gmask = tf.to_float(
                        ((self.final_scores_before_global - threshold) >= 0))
                gmask = gmask * self.loss_mask
                if self.args.global_mask_scale_each_mention_voters_to_one:
                    temp = tf.reduce_sum(
                        gmask, axis=2,
                        keep_dims=True)  # [batch, num_of_spans, 1]
                    temp = tf.where(tf.less(temp, 1e-4), temp,
                                    1. / (temp + 1e-4))
                    gmask = gmask * temp
                elif self.args.global_gmask_based_on_localscore:
                    gmask = gmask * tf.nn.softmax(
                        self.final_scores_before_global)
                self.gmask = gmask

                masked_entity_emb = self.pure_entity_embeddings * tf.expand_dims(
                    gmask, axis=3)  # [b,s,30,300] * [b,s,30,1]
                batch_size = tf.shape(masked_entity_emb)[0]
                all_voters_emb = tf.reduce_sum(tf.reshape(
                    masked_entity_emb, [batch_size, -1, 300]),
                                               axis=1,
                                               keep_dims=True)  # [b, 1, 300]
                span_voters_emb = tf.reduce_sum(
                    masked_entity_emb, axis=2)  # [batch, num_of_spans, 300]
                valid_voters_emb = all_voters_emb - span_voters_emb
                # [b, 1, 300] - [batch, spans, 300] = [batch, spans, 300]  (broadcasting)
                # [300] - [batch, spans, 300]  = [batch, spans, 300]  (broadcasting)
                if self.args.global_norm_or_mean == "norm":
                    valid_voters_emb = tf.nn.l2_normalize(valid_voters_emb,
                                                          dim=2)
                else:
                    all_voters_num = tf.reduce_sum(gmask)  # scalar
                    span_voters_num = tf.reduce_sum(gmask,
                                                    axis=2)  # [batch, spans]
                    valid_voters_emb = valid_voters_emb / tf.expand_dims(
                        all_voters_num - span_voters_num, axis=2)

                self.global_voting_scores = tf.squeeze(tf.matmul(
                    self.pure_entity_embeddings,
                    tf.expand_dims(valid_voters_emb, axis=3)),
                                                       axis=3)
                # [b,s,30,300] matmul [b,s,300,1] --> [b,s,30,1]-->[b,s,30]

            stack_values = []
            if self.args.stage2_nn_components.find("pem") != -1:
                # TODO rename to pem_scores
                self.gpem_scores = self.custom_pem(
                    self.args.gpem_without_log,
                    self.args.gpem_buckets_boundaries)
                stack_values.append(self.gpem_scores)
            if self.args.stage2_nn_components.find("local") != -1:
                stack_values.append(self.final_scores_before_global)
            stack_values.append(self.global_voting_scores)
            scalar_predictors = tf.stack(stack_values, 3)
            #print("scalar_predictors = ", scalar_predictors)   #[b, s, 30, 2]
            with tf.variable_scope("psi_and_global_ffnn"):
                if self.args.global_score_ffnn[0] == 0:
                    self.final_scores = util.projection(scalar_predictors,
                                                        1,
                                                        model=self)
                else:
                    hidden_layers, hidden_size = self.args.global_score_ffnn[
                        0], self.args.global_score_ffnn[1]
                    self.final_scores = util.ffnn(
                        scalar_predictors,
                        hidden_layers,
                        hidden_size,
                        1,
                        self.dropout if self.args.ffnn_dropout else None,
                        model=self)
                # [batch, num_mentions, 30, 1] squeeze to [batch, num_mentions, 30]
                self.final_scores = tf.squeeze(self.final_scores, axis=3)
Esempio n. 10
0
    def add_local_attention_op(self):
        # shape=(b, num_of_spans, 30, 300)
        attention_entity_emb = self.pure_entity_embeddings if self.args.attention_ent_vecs_no_regularization else self.entity_embeddings
        with tf.variable_scope("attention"):
            K = self.args.attention_K
            left_mask = self._sequence_mask_v13(self.begin_span, K)
            #left_mask = tf.sequence_mask(self.begin_span, K, dtype=tf.float32)
            right_mask = self._sequence_mask_v13(
                tf.expand_dims(self.words_len, 1) - self.end_span, K)
            #right_mask = tf.sequence_mask(tf.expand_dims(self.words_len, 1) - self.end_span,  # number of words on the right
            #                              K, dtype=tf.float32)  # but maximum i get K not more
            ctxt_mask = tf.concat([left_mask, right_mask],
                                  2)  # [batch, num_of_spans, 2*K]
            ctxt_mask = tf.log(
                tf.minimum(1.0, tf.maximum(self.args.zero, ctxt_mask)))
            #  T,   T,  T, F,  F | T,  T,  F,  F,  F
            # -1, -2, -3, -4, -5  +0, +1, +2, +3, +4

            leftctxt_indices = tf.maximum(0, tf.range(-1, -K - 1, -1) + \
                                          tf.expand_dims(self.begin_span, 2))  # [batch, num_mentions, K]
            rightctxt_indices = tf.minimum(tf.shape(self.pure_word_embeddings)[1] - 1, tf.range(K) + \
                                           tf.expand_dims(self.end_span, 2))  # [batch, num_mentions, K]
            ctxt_indices = tf.concat([leftctxt_indices, rightctxt_indices],
                                     2)  # [batch, num_mentions, 2*K]

            batch_index = tf.tile(
                tf.expand_dims(
                    tf.expand_dims(tf.range(tf.shape(ctxt_indices)[0]), 1), 2),
                [1, tf.shape(ctxt_indices)[1],
                 tf.shape(ctxt_indices)[2]])
            ctxt_indices = tf.stack([batch_index, ctxt_indices], 3)
            # [batch, num_of_spans, 2*K, 2]   the last dimension is row,col for gather_nd
            # [batch, num_of_spans, 2*K, [row,col]]

            att_x_w = self.pure_word_embeddings  # [batch, max_sent_len, 300]
            if self.args.attention_on_lstm and self.args.nn_components.find(
                    "lstm") != -1:
                # [batch, max_sent_len, 600]  hidden_size_of_lstm*2 so project it to 300
                # TODO maybe omit projection if already in 300 dimention? but projection allows transormation...
                att_x_w = util.projection(
                    self.context_emb, 300, model=self
                )  # if tf.shape(self.context_emb)[-1] != 300 else self.context_emb

            ctxt_word_emb = tf.gather_nd(att_x_w, ctxt_indices)
            # [batch, num_of_spans, 2K, emb_size]    emb_size = 300  only pure word emb used
            #  and not after we add char emb and dropout

            x_c_voters = attention_entity_emb
            # restrict the number of entities that participate in the forming of the x_c context vector
            if self.args.attention_retricted_num_of_entities:
                x_c_voters = tf.slice(attention_entity_emb, [0, 0, 0, 0], [
                    -1, -1, self.args.attention_retricted_num_of_entities, -1
                ])
            if self.args.attention_use_AB:
                att_A = tf.get_variable("att_A", [300])
                x_c_voters = att_A * x_c_voters
            # [b, num_of_spans, 2*K, 300] mul [b, num_of_spans, 30, 300]      instead of 30 it can be the reduced number of entities
            scores = tf.matmul(ctxt_word_emb, x_c_voters,
                               transpose_b=True)  # [b, spans, 2K, 30]
            scores = tf.reduce_max(
                scores, reduction_indices=[-1]
            )  # max score of each word for each span acquired from any cand entity

            scores = scores + ctxt_mask  # some words are not valid out of window
            # so we assign to them very low score

            top_values, _ = tf.nn.top_k(scores, self.args.attention_R)
            # [batch, num_of_spans, R]
            #R_value = tf.reduce_min(top_values, axis=-1)
            R_value = top_values[:, :, -1]  # [batch, num_of_spans]
            # same as above command but probably faster
            R_value = tf.maximum(self.args.zero,
                                 R_value)  # so to avoid keeping words that
            # have max score with any of the entities <=0 (also score = 0 can have words with
            # padding candidate entities)

            threshold = tf.tile(tf.expand_dims(R_value, 2), [1, 1, 2 * K])
            # [batch, num_of_spans, 2K]
            scores = scores - tf.to_float(
                ((scores - threshold) <
                 0)) * 50  # 50 where score<thr, 0 where score>=thr
            scores = tf.nn.softmax(scores, dim=2)  # [batch, num_of_spans, 2K]
            scores = tf.expand_dims(scores, 3)  # [batch, num_of_spans, 2K, 1]
            #    [batch, num_of_spans, 2K, 1]  *  [batch, num_of_spans, 2K, emb_size]
            # =  [batch, num_of_spans, 2K, emb_size]
            x_c = tf.reduce_sum(scores * ctxt_word_emb,
                                2)  # =  [batch, num_of_spans, emb_size]
            if self.args.attention_use_AB:
                att_B = tf.get_variable("att_B", [300])
                x_c = att_B * x_c
            x_c = tf.expand_dims(x_c,
                                 3)  #   [batch, num_of_spans, emb_size, 1]
            # [batch, num_of_spans, 30, emb_size=300]  mul with  [batch, num_of_spans, emb_size, 1]
            x_e__x_c = tf.matmul(attention_entity_emb,
                                 x_c)  # [batch, num_of_spans, 30, 1]
            x_e__x_c = tf.squeeze(x_e__x_c,
                                  axis=3)  # [batch, num_of_spans, 30]
            self.attention_scores = x_e__x_c
Esempio n. 11
0
    def add_span_emb_op(self):
        mention_emb_list = []
        # span embedding based on boundaries (start, end) and head mechanism. but do that on top of contextual bilistm
        # output or on top of original word+char embeddings. this flag determines that. Of course by default head
        # is on top of word+char emb instead of bilstm output.
        boundaries_input_vecs = self.word_embeddings if self.args.span_boundaries_from_wordemb else self.context_emb

        if self.args.span_emb.find("boundaries") != -1:
            mention_start_emb = tf.gather_nd(
                boundaries_input_vecs,
                tf.stack([
                    tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.begin_span)[0]),
                                       1), [1, tf.shape(self.begin_span)[1]]),
                    self.begin_span
                ], 2))
            #mention_start_emb = tf.gather(text_outputs, mention_starts)  # [num_mentions, emb]
            mention_emb_list.append(mention_start_emb)

            mention_end_emb = tf.gather_nd(
                boundaries_input_vecs,
                tf.stack([
                    tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.begin_span)[0]),
                                       1), [1, tf.shape(self.begin_span)[1]]),
                    tf.nn.relu(self.end_span - 1)
                ], 2))  # -1 because the end of span in exclusive  [start, end)
            # relu so the 0 don't become -1
            #mention_end_emb = tf.gather(text_outputs, mention_ends)  # [num_mentions, emb]
            mention_emb_list.append(mention_end_emb)
            #print("mention_start_emb = ", mention_start_emb)
            #print("mention_end_emb = ", mention_end_emb)

        mention_width = self.end_span - self.begin_span  # [batch, num_mentions]
        # TODO remove the comment code below
        """
        if self.args.use_features:
            mention_width_index = mention_width - 1  # [num_mentions]
            mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.args["max_mention_width"],
                                                                                       self.args["feature_size"]]),
                                          mention_width_index)  # [batch, num_mentions, emb]
            mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
            #print("mention_width_emb = ", mention_width_emb)
            mention_emb_list.append(mention_width_emb)
        """

        if self.args.span_emb.find(
                "head") != -1:  # here the attention is computed
            self.max_mention_width = tf.minimum(
                self.args.max_mention_width,
                tf.reduce_max(self.end_span - self.begin_span))
            mention_indices = tf.range(self.max_mention_width) + \
                              tf.expand_dims(self.begin_span, 2)  # [batch, num_mentions, max_mention_width]
            mention_indices = tf.minimum(
                tf.shape(self.word_embeddings)[1] - 1,
                mention_indices)  # [batch, num_mentions, max_mention_width]
            #print("mention_indices = ", mention_indices)
            batch_index = tf.tile(
                tf.expand_dims(
                    tf.expand_dims(tf.range(tf.shape(mention_indices)[0]), 1),
                    2), [
                        1,
                        tf.shape(mention_indices)[1],
                        tf.shape(mention_indices)[2]
                    ])
            mention_indices = tf.stack([batch_index, mention_indices], 3)
            # [batch, num_mentions, max_mention_width, [row,col] ]    4d tensor

            # this means that head will be either the same as boundaries or boundaries from bilstm and head from wordemb
            head_input_vecs = boundaries_input_vecs if self.args.model_heads_from_bilstm else self.word_embeddings
            mention_text_emb = tf.gather_nd(head_input_vecs, mention_indices)
            # [batch, num_mentions, max_mention_width, 500 ]    4d tensor
            #print("mention_text_emb = ", mention_text_emb)

            with tf.variable_scope("head_scores"):
                # from [batch, max_sent_len, 600] to [batch, max_sent_len, 1]
                self.head_scores = util.projection(boundaries_input_vecs,
                                                   1,
                                                   model=self)
            # [batch, num_mentions, max_mention_width, 1]
            mention_head_scores = tf.gather_nd(self.head_scores,
                                               mention_indices)
            #print("mention_head_scores = ", mention_head_scores)
            if not tf.__version__.startswith("1.4"):
                temp_shape = tf.shape(mention_width)
                temp = tf.sequence_mask(tf.reshape(mention_width, [-1]),
                                        self.max_mention_width,
                                        dtype=tf.float32)
                temp_mask = tf.reshape(
                    temp, [temp_shape[0], temp_shape[1],
                           tf.shape(temp)[-1]])
            else:
                temp_mask = tf.sequence_mask(mention_width,
                                             self.max_mention_width,
                                             dtype=tf.float32)
            mention_mask = tf.expand_dims(
                temp_mask, 3)  # [batch, num_mentions, max_mention_width, 1]
            mention_mask = tf.minimum(1.0,
                                      tf.maximum(self.args.zero,
                                                 mention_mask))  # 1e-3
            mention_attention = tf.nn.softmax(
                mention_head_scores + tf.log(mention_mask),
                dim=2)  # [batch, num_mentions, max_mention_width, 1]
            mention_head_emb = tf.reduce_sum(mention_attention *
                                             mention_text_emb,
                                             2)  # [batch, num_mentions, emb]
            #print("mention_head_emb = ", mention_head_emb)
            mention_emb_list.append(mention_head_emb)

        self.span_emb = tf.concat(mention_emb_list,
                                  2)  # [batch, num_mentions, emb i.e. 1700]