Example #1
0
    def _decode(self):
        """
        Employs Pointer Network to get the the probs of each position
        to be the start or end of the predicted answer.
        Note that we concat the fuse_p_encodes for the passages in the same document.
        And since the encodes of queries in the same document is same, we select the first one.
        """
        with tf.variable_scope('start_pos_predict'):
            self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes, self.p_length,
                                         self.hidden_size, layer_num=1)
            start_weight = tf.get_variable("start_weight", self.hidden_size * 2)
            start_logits = tf.tensordot(self.fuse_p_encodes, start_weight, axes=[[2], [0]])

        with tf.variable_scope('end_pos_predict'):
            concat_GM_2 = tf.concat([self.match_p_encodes, self.fuse_p_encodes], -1)
            self.end_p_encodes, _ = bilstm_layer(concat_GM_2, self.p_length,
                                        self.hidden_size, layer_num=1)
            
            end_weight = tf.get_variable("start_weight", self.hidden_size * 2)
            end_logits = tf.tensordot(self.end_p_encodes, end_weight, axes=[[2], [0]])

        with tf.variable_scope('same_question_concat'):
            batch_size = tf.shape(self.start_label)[0]

            concat_start_logits = tf.reshape(start_logits, [batch_size, -1])
            concat_end_logits = tf.reshape(end_logits, [batch_size, -1])

        self.start_probs = tf.nn.softmax(concat_start_logits, axis=1)
        self.end_probs = tf.nn.softmax(concat_end_logits, axis=1)
Example #2
0
 def _encode(self):
     """
     Employs two Bi-LSTMs to encode passage and question separately
     """
     with tf.variable_scope('encoding'):
         self.sep_p_encodes, _ =  bilstm_layer(self.p_emb, self.p_length, self.hidden_size)
         tf.get_variable_scope().reuse_variables()
         self.sep_q_encodes, _ =  bilstm_layer(self.q_emb, self.q_length, self.hidden_size)
     if self.use_dropout:
         self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, 1-self.dropout)
         self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, 1-self.dropout)
Example #3
0
    def _encode(self):
        """
        Employs two Bi-LSTMs to encode passage and question separately
        """
        if self.use_dropout:
            self.p_emb = tf.nn.dropout(self.p_emb, self.dropout_keep_prob)
            self.q_emb = tf.nn.dropout(self.q_emb, self.dropout_keep_prob)

        with tf.variable_scope('passage_encoding'):
            self.sep_p_encodes, _ = bilstm_layer(self.p_emb, self.p_length, self.hidden_size)
        with tf.variable_scope('question_encoding'):
            self.sep_q_encodes, _ = bilstm_layer(self.q_emb, self.q_length, self.hidden_size)
Example #4
0
 def _fuse(self):
     with tf.variable_scope('self-attention'):
         self.fuse_p_encodes,_= bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size)
         JX = tf.shape(self.fuse_p_encodes)[1]
         sim_matrix = tf.matmul(self.fuse_p_encodes, self.fuse_p_encodes, transpose_b=True)
         sim_matrix /= self.hidden_size ** 0.5
         mask_c = tf.tile(tf.expand_dims(self.c_mask, axis=1), [1, JX, 1])
         context2context_attn = tf.matmul(tf.nn.softmax(softmax_mask(sim_matrix, mask_c), -1), self.fuse_p_encodes)
         self.fuse_p_encodes2, _ = bilstm_layer(context2context_attn+self.match_p_encodes, self.p_length, self.hidden_size)
         r=tf.tanh(tf.layers.dense(tf.concat([self.fuse_p_encodes,self.fuse_p_encodes2],-1), self.hidden_size * 2, activation=None))
         g =tf.sigmoid(tf.layers.dense(tf.concat([self.fuse_p_encodes, self.fuse_p_encodes2], -1), self.hidden_size * 2,activation=None))
         self.concat_p_encodes=g*r+(1-g)*self.fuse_p_encodes
         if self.use_dropout:
             self.concat_p_encodes = tf.nn.dropout(self.concat_p_encodes, 1 - self.dropout)
Example #5
0
    def _decode_yesno(self):
        """
        Employs Pointer Network to get the the probs of each position
        to be the start or end of the predicted answer.
        Note that we concat the fuse_p_encodes for the passages in the same document.
        And since the encodes of queries in the same document is same, we select the first one.
        """
        with tf.variable_scope('class_predict'):
            self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes,
                                                  self.p_length,
                                                  self.hidden_size,
                                                  layer_num=1)
            fuse_p_encodes_pool = tf.reduce_max(
                self.fuse_p_encodes, axis=1)  #TODO--self.p_length作为mask?
            classify_weight = tf.get_variable(
                "classify_weight",
                shape=[self.hidden_size * 2, 3],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            bais = tf.get_variable("bais",
                                   shape=[3],
                                   dtype=tf.float32,
                                   initializer=tf.constant_initializer(0))
            class_logits = tf.nn.bias_add(
                tf.matmul(fuse_p_encodes_pool, classify_weight), bais)

        self.classprobs = tf.nn.softmax(class_logits, axis=1)
Example #6
0
    def _fuse(self):
        with tf.variable_scope('self-attention'):
            self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes,
                                                  self.p_length,
                                                  self.hidden_size)

            Q = tf.layers.dense(self.fuse_p_encodes,
                                2 * self.hidden_size,
                                use_bias=False)  # (N, T_q, d_model)
            K = tf.layers.dense(self.fuse_p_encodes,
                                2 * self.hidden_size,
                                use_bias=False)  # (N, T_k, d_model)
            V = tf.layers.dense(self.fuse_p_encodes,
                                2 * self.hidden_size,
                                use_bias=False)  # (N, T_k, d_model)
            # Split and concat
            Q_ = tf.concat(tf.split(Q, 4, axis=2),
                           axis=0)  # (h*N, T_q, d_model/h)
            K_ = tf.concat(tf.split(K, 4, axis=2),
                           axis=0)  # (h*N, T_k, d_model/h)
            V_ = tf.concat(tf.split(V, 4, axis=2),
                           axis=0)  # (h*N, T_k, d_model/h)
            d_k = Q_.get_shape().as_list()[-1]
            sim_matrix = tf.matmul(Q_, K_, transpose_b=True)
            sim_matrix /= d_k**0.5
            context2context_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), V_)
            context2context_attn = tf.concat(tf.split(context2context_attn,
                                                      4,
                                                      axis=0),
                                             axis=2)  # (N, T_q, d_model)

            self.residual_match = self.match_p_encodes + tf.nn.dropout(
                tf.layers.dense(tf.concat([
                    self.fuse_p_encodes, context2context_attn,
                    self.fuse_p_encodes * context2context_attn
                ], -1),
                                self.hidden_size * 2,
                                activation=tf.nn.relu), 1 - self.dropout)

        with tf.variable_scope('modeling'):
            self.fuse_p_encodes2, _ = bilstm_layer(self.residual_match,
                                                   self.p_length,
                                                   self.hidden_size)

        if self.use_dropout:
            self.fuse_p_encodes2 = tf.nn.dropout(self.fuse_p_encodes2,
                                                 1 - self.dropout)
Example #7
0
 def _fuse(self):
     """
     Employs Bi-LSTM again to fuse the context information after match layer
     """
     with tf.variable_scope('fusion'):
         self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes,
                                               self.p_length,
                                               self.hidden_size)
         if self.use_dropout:
             self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes,
                                                 1 - self.dropout)
Example #8
0
    def _fuse(self):
        """
        Employs Bi-LSTM again to fuse the context information after match layer
        """
        with tf.variable_scope('fusion'):
            self.match_p_encodes = tf.layers.dense(self.match_p_encodes, self.hidden_size * 2,
                                                   activation=tf.nn.relu)

            self.residual_p_emb = self.match_p_encodes
            if self.use_dropout:
                self.residual_p_emb = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob)

            self.residual_p_encodes, _ = bilstm_layer(self.residual_p_emb, self.p_length,
                                             self.hidden_size, layer_num=1)
            if self.use_dropout:
                self.residual_p_encodes = tf.nn.dropout(self.residual_p_encodes, self.dropout_keep_prob)
            #bilstm不能直接连接dense AttributeError: 'Bidirectional' object has no attribute 'outbound_nodes'
            sim_weight_1 = tf.get_variable("sim_weight_1", self.hidden_size * 2)
            weight_passage_encodes = self.residual_p_encodes * sim_weight_1
            dot_sim_matrix = tf.matmul(weight_passage_encodes, self.residual_p_encodes, transpose_b=True)
            sim_weight_2 = tf.get_variable("sim_weight_2", self.hidden_size * 2)
            passage_sim = tf.tensordot(self.residual_p_encodes, sim_weight_2, axes=[[2], [0]])
            sim_weight_3 = tf.get_variable("sim_weight_3", self.hidden_size * 2)
            question_sim = tf.tensordot(self.residual_p_encodes, sim_weight_3, axes=[[2], [0]])
            sim_matrix = dot_sim_matrix + tf.expand_dims(passage_sim, 2) + tf.expand_dims(question_sim, 1)
            # sim_matrix = tf.matmul(self.residual_p_encodes, self.residual_p_encodes, transpose_b=True)

            batch_size, num_rows = tf.shape(sim_matrix)[0:1], tf.shape(sim_matrix)[1]
            mask = tf.eye(num_rows, batch_shape=batch_size)
            sim_matrix = sim_matrix + -1e9 * mask

            context2question_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), self.residual_p_encodes)
            concat_outputs = tf.concat([self.residual_p_encodes, context2question_attn,
                                        self.residual_p_encodes * context2question_attn], -1)
            self.residual_match_p_encodes = tf.layers.dense(concat_outputs, self.hidden_size * 2, activation=tf.nn.relu)

            self.match_p_encodes = tf.add(self.match_p_encodes, self.residual_match_p_encodes)
            if self.use_dropout:
                self.match_p_encodes = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob)
Example #9
0
    def _fuse(self):
        with tf.variable_scope('self-attention'):
            self.context2context, _ = bilstm_layer(self.match_p_encodes,
                                                   self.p_length,
                                                   self.hidden_size)

            if self.use_dropout:
                self.context2context = tf.nn.dropout(self.context2context,
                                                     1 - self.dropout)

            sim_matrix = tf.matmul(self.context2context,
                                   self.context2context,
                                   transpose_b=True)
            sim_matrix /= self.hidden_size**0.5
            context2context_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1),
                                             self.context2context)
            self.fuse_p_encodes = self.match_p_encodes + tf.nn.dropout(
                tf.layers.dense(tf.concat([
                    self.context2context, context2context_attn,
                    self.context2context * context2context_attn
                ], -1),
                                self.hidden_size * 2,
                                activation=tf.nn.relu), 1 - self.dropout)