예제 #1
0
    def _aggregateBlock(self, v_1, v_2, scope):
        """
        :param v_1: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
        :param v_2: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
        :param scope: scope name

        v1_sum, v2_sum: sum of the compared phrases (axis = seq_length), tensor with shape (batch_size, hidden_size)
        v: concat of v1_sum, v2_sum, tensor with shape (batch_size, 2 * hidden_size)
        ff_outputs: output of feed forward layer (H), tensor with shape (batch_size, hidden_size)

        :return: y_hat: output of a linear layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            # v1 = \sum_{i=1}^l_a v_{1,i}
            # v2 = \sum_{j=1}^l_b v_{2,j} (4)
            v1_sum = tf.reduce_sum(v_1, axis=1)
            v2_sum = tf.reduce_sum(v_2, axis=1)
            print_shape('v1_sum', v1_sum)
            print_shape('v2_sum', v2_sum)

            # y_hat = H([v1, v2]) (5)
            v = tf.concat([v1_sum, v2_sum], axis=1)
            print_shape('v', v)

            ff_outputs = self._feedForwardBlock(v, self.hidden_size, 'H')
            print_shape('ff_outputs', ff_outputs)

            y_hat = tf.layers.dense(ff_outputs, self.n_classes)
            print_shape('y_hat', y_hat)
            return y_hat
예제 #2
0
    def _compositionBlock(self, v_p, v_h, scope):
        """
        :param v_p: concat of [m_p, a_p, sub_p, mul_p], tensor with shape (batch_size, self_attention_r, 4 * 2 * rnn_size)
        :param v_h: concat of [m_h, a_h, sub_h, mul_h], tensor with shape (batch_size, self_attention_r, 4 * 2 * rnn_size)
        :param scope: scope name

        v_mean_p, v_mean_h: self-attentive directions (axis = self_attention_r) average of v_p, v_h, tensor with shape (batch_size, 4 * 2 * hidden_size)
        v_max_p, v_max_h: self-attentive directions (axis = self_attention_r) max value of v_p, v_h, tensor with shape (batch_size, 4 * 2 * hidden_size)
        v: concat of [v_mean_p, v_mean_h, v_max_p, v_max_h], tensor with shape (batch_size, 4 * 4 * 2 * hidden_size)
        ff_outputs: output of feed forward layer, tensor with shape (batch_size, hidden_size)

        :return: y_hat: output of a linear layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            v_mean_p = tf.reduce_mean(v_p, axis=1)
            v_mean_h = tf.reduce_mean(v_h, axis=1)
            v_max_p = tf.reduce_max(v_p, axis=1)
            v_max_h = tf.reduce_max(v_h, axis=1)
            print_shape('v_mean_p', v_mean_p)
            print_shape('v_max_p', v_max_p)

            v = tf.concat([v_mean_p, v_mean_h, v_max_p, v_max_h], axis=1)
            print_shape('v', v)

            ff_outputs = self._feedForwardBlock(v, self.hidden_size, 'H')
            print_shape('ff_outputs', ff_outputs)

            y_hat = tf.layers.dense(ff_outputs, self.n_classes)
            print_shape('y_hat', y_hat)
            return y_hat
예제 #3
0
 def _add_variables(self):
     """
     Embedding: Variables to hold word embeddings. Untrainable.
     """
     self.Embedding = tf.Variable(tf.truncated_normal(
         [self.n_vocab, self.embedding_size]),
                                  dtype=tf.float32,
                                  name='Embedding',
                                  trainable=False)
     self.init_embedding = self.Embedding.assign(self.embed_matrix)
     self.Embedding = self._projectionBlock(self.Embedding,
                                            self.hidden_size, 'Projection')
     print_shape('projected embeddings', self.Embedding)
예제 #4
0
    def _loss_op(self, l2_lambda=0.0001):
        """
        :param l2_lambda: L2 normalization constant

        AAt_p, AAt_h: product of self attention weight vector (A * At), tensor with shape (batch_size, self_attention_r, self_attention_r)
        batch_I: batch identity matrix, tensor with shape (batch_size, self_attention_r, self_attention_r)
        penalty_p, penalty_h: penalty of premise's self attention vector, hypothesis's self attention vector, tensor with shape (batch_size)
        lambda_penalty: penalty normalization constant
        penalty: penalty of self attention vector, a scalar

        :return: loss: training loss
        """
        with tf.name_scope('cost'):
            AAt_p = tf.matmul(self.A_p, tf.transpose(self.A_p, [0, 2, 1]))
            AAt_h = tf.matmul(self.A_h, tf.transpose(self.A_h, [0, 2, 1]))
            print_shape('AAt_p', AAt_p)

            I = tf.eye(self.self_attention_r)
            batch_I = tf.reshape(tf.tile(I, [tf.shape(self.A_p)[0], 1]), [-1, self.self_attention_r, self.self_attention_r])
            print_shape('batch_I', batch_I)

            penalty_p = tf.square(tf.norm(AAt_p - batch_I, axis=[-2, -1], ord='fro'))
            penalty_h = tf.square(tf.norm(AAt_h - batch_I, axis=[-2, -1], ord='fro'))
            print_shape('penalty_p', penalty_p)

            penalty = tf.reduce_mean((penalty_p + penalty_h) * self.lambda_penalty)
            print_shape('penalty', penalty)

            losses = tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits=self.logits)
            label_loss = tf.reduce_mean(losses, name='loss_val')
            weights = [v for v in tf.trainable_variables() if 'kernel' in v.name]
            l2_loss = tf.add_n([tf.nn.l2_loss(w) for w in weights]) * l2_lambda
            loss = label_loss + l2_loss + penalty
        return loss
예제 #5
0
    def _attentionBlock(self, m_p, m_h, scope):
        """
        :param m_p: output of self-attention layer, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)
        :param m_q: output of self-attention layer, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)
        :param scope: scope name

        a_p, a_h: output of attention layer, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)
        sub_p, sub_h: difference of m_p and a_p, m_h and a_h, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)
        mul_p, mul_h: hadamard product of m_p and a_p, m_h and a_h, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)

        :return: v_p: concat of [m_p, a_p, sub_p, mul_p], tensor with shape (batch_size, self_attention_r, 4 * 2 * rnn_size)
                 v_h: concat of [m_h, a_h, sub_h, mul_h], tensor with shape (batch_size, self_attention_r, 4 * 2 * rnn_size)
        """
        with tf.variable_scope(scope):
            attention_layer = AttentionLayer()
            a_p, a_h = attention_layer(m_p, m_h)
            print_shape('a_p', a_p)

            sub_p = tf.subtract(m_p, a_p)
            sub_h = tf.subtract(m_h, a_h)
            mul_p = tf.multiply(m_p, a_p)
            mul_h = tf.multiply(m_h, a_h)
            print_shape('sub_p', sub_p)
            print_shape('mul_p', mul_p)

            v_p = tf.concat([m_p, a_p, sub_p, mul_p], axis=2)
            v_h = tf.concat([m_h, a_h, sub_h, mul_h], axis=2)
            print_shape('v_p', v_p)

            return v_p, v_h
예제 #6
0
파일: Model.py 프로젝트: rspai/FEVER_Task
    def _compareBlock(self, alpha, beta, scope):
        """
        :param alpha: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
        :param beta: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
        :param scope: scope name

        a_beta, b_alpha: concat of [embeded_premise, beta], [embeded_hypothesis, alpha], tensor with shape (batch_size, seq_length, 2 * embedding_size)

        :return: v_1: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
                 v_2: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
        """
        with tf.variable_scope(scope):
            a_beta = tf.concat([self.embeded_left, beta], axis=2)
            b_alpha = tf.concat([self.embeded_right, alpha], axis=2)
            print_shape('a_beta', a_beta)
            print_shape('b_alpha', b_alpha)

            # v_1,i = G([a_bar_i, beta_i])
            # v_2,j = G([b_bar_j, alpha_j]) (3)
            v_1 = self._feedForwardBlock(a_beta, self.hidden_size, 'G')
            v_2 = self._feedForwardBlock(b_alpha,
                                         self.hidden_size,
                                         'G',
                                         isReuse=True)
            print_shape('v_1', v_1)
            print_shape('v_2', v_2)
            return v_1, v_2
예제 #7
0
    def _compositionBlock(self, m_a, m_b, hiddenSize, scope):
        """
        :param m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param hiddenSize: biLSTM cell's hidden states size
        :param scope: scope name

        outputV_a, outputV_b: hidden states of biLSTM, tuple (forward LSTM cell, backward LSTM cell)
        v_a, v_b: concate of biLSTM hidden states, tensor with shape (batch_size, seq_length, 2 * hidden_size)
        v_a_avg, v_b_avg: timestep (axis = seq_length) average of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v_a_max, v_b_max: timestep (axis = seq_length) max value of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v: concat of [v_a_avg, v_b_avg, v_a_max, v_b_max], tensor with shape (batch_size, 4 * 2 * hidden_size)

        :return: y_hat: output of feed forward layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            outputV_a, finalStateV_a = self._biLSTMBlock(
                m_a, hiddenSize, 'biLSTM')
            outputV_b, finalStateV_b = self._biLSTMBlock(m_b,
                                                         hiddenSize,
                                                         'biLSTM',
                                                         isReuse=True)
            v_a = tf.concat(outputV_a, axis=2)
            v_b = tf.concat(outputV_b, axis=2)

            print_shape('v_a', v_a)
            print_shape('v_b', v_b)

            # v_{a,avg} = \sum_{i=1}^l_a \frac{v_a,i}{l_a}, v_{a,max} = \max_{i=1} ^ l_a v_{a,i} (18)
            # v_{b,avg} = \sum_{j=1}^l_b \frac{v_b,j}{l_b}, v_{b,max} = \max_{j=1} ^ l_b v_{b,j} (19)
            v_a_avg = tf.reduce_mean(v_a, axis=1)
            v_b_avg = tf.reduce_mean(v_b, axis=1)
            v_a_max = tf.reduce_max(v_a, axis=1)
            v_b_max = tf.reduce_max(v_b, axis=1)
            print_shape('v_a_avg', v_a_avg)
            print_shape('v_a_max', v_a_max)

            # v = [v_{a,avg}; v_{a,max}; v_{b,avg}; v_{b_max}] (20)
            v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1)
            print_shape('v', v)
            y_hat = self._feedForwardBlock(v, self.hidden_size, self.n_classes,
                                           'feed_forward')
            return y_hat
예제 #8
0
    def _aggregateBlock(self, v_1, v_2, scope, left_mask, right_mask,
                        dense_features):
        """
        :param v_1: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
        :param v_2: compare the aligned phrases, output of feed forward layer (G), tensor with shape (batch_size, seq_length, hidden_size)
        :param scope: scope name

        v1_sum, v2_sum: sum of the compared phrases (axis = seq_length), tensor with shape (batch_size, hidden_size)
        v: concat of v1_sum, v2_sum, tensor with shape (batch_size, 2 * hidden_size)
        ff_outputs: output of feed forward layer (H), tensor with shape (batch_size, hidden_size)

        :return: y_hat: output of a linear layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            left_mask = tf.to_float(tf.expand_dims(left_mask, axis=2))
            right_mask = tf.to_float(tf.expand_dims(right_mask, axis=2))
            v_1 = v_1 * left_mask
            v_2 = v_2 * right_mask

            # v1 = \sum_{i=1}^l_a v_{1,i}
            # v2 = \sum_{j=1}^l_b v_{2,j} (4)
            v1_sum = tf.reduce_sum(v_1, axis=1)
            v2_sum = tf.reduce_sum(v_2, axis=1)
            print_shape('v1_sum', v1_sum)
            print_shape('v2_sum', v2_sum)

            # y_hat = H([v1, v2]) (5)
            v = tf.concat([v1_sum, v2_sum, dense_features], axis=1)
            print_shape('v', v)

            ff_outputs = self._feedForwardBlock(v, self.hidden_size, 'H')
            print_shape('ff_outputs', ff_outputs)

            # compute the logits
            y_hat = tf.layers.dense(ff_outputs, self.n_classes, \
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), \
                                    bias_initializer=tf.contrib.layers.xavier_initializer())
            print_shape('y_hat', y_hat)
            return y_hat
예제 #9
0
    def __call__(self, H):
        # A = softmax(W_s2 * tanh(W_s1 * H.T)) (7)
        Ws1Ht = tf.map_fn(fn = lambda x: tf.matmul(self.W_s1, tf.transpose(x)), elems=H)
        print_shape('Ws1Ht', Ws1Ht)

        e = tf.map_fn(fn = lambda x: tf.matmul(self.W_s2, tf.tanh(x)), elems=Ws1Ht)
        print_shape('e', e)

        A = tf.nn.softmax(e)
        print_shape('A', A)

        # M = A * H (8)
        M = tf.matmul(A, H)
        print_shape('M', M)
        return M, A
예제 #10
0
    def _attendBlock(self, scope, left_mask, right_mask):
        """
        :param scope: scope name

        embeded_left, embeded_right: tensor with shape (batch_size, seq_length, embedding_size)
        F_a_bar, F_b_bar: output of feed forward layer (F), tensor with shape (batch_size, seq_length, hidden_size)
        attentionSoft_a, attentionSoft_b: using Softmax at two directions, tensor with shape (batch_size, seq_length, seq_length)
        e: attention matrix with mask, tensor with shape (batch_size, seq_length, seq_length)

        :return: alpha: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
                 beta: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
        """
        with tf.variable_scope(scope):
            F_a_bar = self._feedForwardBlock(self.embeded_left,
                                             self.hidden_size, 'F')
            F_b_bar = self._feedForwardBlock(self.embeded_right,
                                             self.hidden_size,
                                             'F',
                                             isReuse=True)
            print_shape('F_a_bar', F_a_bar)
            print_shape('F_b_bar', F_b_bar)

            # e_i,j = F'(a_hat, b_hat) = F(a_hat).T * F(b_hat) (1)
            e_raw = tf.matmul(F_a_bar, tf.transpose(F_b_bar, [0, 2, 1]))

            # mask padding sequence
            #mask = tf.multiply(tf.expand_dims(left_mask, 2), tf.expand_dims(right_mask, 1))
            #e = tf.multiply(e_raw, mask) + (1.0 - mask)*(-1e9)
            #print_shape('e', e)

            right_mask = tf.to_float(tf.expand_dims(right_mask, axis=1))
            e = e_raw + (1.0 - right_mask) * (-1e9)
            beta_attend = tf.nn.softmax(e, dim=-1)
            beta = tf.matmul(beta_attend, self.embeded_right)

            e_raw = tf.transpose(e_raw, [0, 2, 1])
            left_mask = tf.to_float(tf.expand_dims(left_mask, axis=1))
            e = e_raw + (1.0 - left_mask) * (-1e9)
            alpha_attend = tf.nn.softmax(e, dim=-1)
            alpha = tf.matmul(alpha_attend, self.embeded_left)

            # beta = \sum_{j=1}^l_b \frac{\exp(e_{i,j})}{\sum_{k=1}^l_b \exp(e_{i,k})} * b_hat_j
            # alpha = \sum_{i=1}^l_a \frac{\exp(e_{i,j})}{\sum_{k=1}^l_a \exp(e_{k,j})} * a_hat_i (2)
            print_shape('alpha', alpha)
            print_shape('beta', beta)

            return alpha, beta
예제 #11
0
    def __call__(self, p, h):
        w_att = tf.matmul(p, tf.transpose(h, [0, 2, 1]))
        print_shape('w_att', w_att)

        softmax_p = tf.nn.softmax(w_att)
        softmax_h = tf.nn.softmax(tf.transpose(w_att))
        softmax_h = tf.transpose(softmax_h)
        print_shape('softmax_p', softmax_p)
        print_shape('softmax_h', softmax_h)

        p_hat = tf.matmul(softmax_p, h)
        h_hat = tf.matmul(softmax_h, p)
        return p_hat, h_hat
예제 #12
0
    def _logits_op(self):
        # [batch_size, seq_length, embedding_dim]
        self.embeded_left = tf.nn.embedding_lookup(self.Embedding,
                                                   self.premise)
        self.embeded_right = tf.nn.embedding_lookup(self.Embedding,
                                                    self.hypothesis)
        print_shape('embeded_left', self.embeded_left)
        print_shape('embeded_right', self.embeded_right)

        # [batch_size, seq_length]
        left_mask = tf.sequence_mask(self.premise_mask, self.seq_length,
                                     tf.float32)
        right_mask = tf.sequence_mask(self.hypothesis_mask, self.seq_length,
                                      tf.float32)
        print_shape('left_mask', left_mask)
        print_shape('right_mask', right_mask)

        alpha, beta = self._attendBlock('Attend', left_mask, right_mask)
        v_1, v_2 = self._compareBlock(alpha, beta, 'Compare')
        logits = self._aggregateBlock(v_1, v_2, 'Aggregate', left_mask,
                                      right_mask, self.features)
        return logits
예제 #13
0
    def _inputEncodingBlock(self, scope):
        """
        :param scope: scope name

        embeded_left, embeded_right: tensor with shape (batch_size, seq_length, embedding_size)

        :return: a_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size)
                 b_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size)
        """
        with tf.device('/cpu:0'):
            self.Embedding = tf.get_variable(
                'Embedding', [self.n_vocab, self.embedding_size], tf.float32)
            self.embeded_left = tf.nn.embedding_lookup(self.Embedding,
                                                       self.premise)
            self.embeded_right = tf.nn.embedding_lookup(
                self.Embedding, self.hypothesis)
            print_shape('embeded_left', self.embeded_left)
            print_shape('embeded_right', self.embeded_right)

        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            outputsPremise, finalStatePremise = self._biLSTMBlock(
                self.embeded_left, self.hidden_size, 'biLSTM',
                self.premise_mask)
            outputsHypothesis, finalStateHypothesis = self._biLSTMBlock(
                self.embeded_right,
                self.hidden_size,
                'biLSTM',
                self.hypothesis_mask,
                isReuse=True)

            a_bar = tf.concat(outputsPremise, axis=2)
            b_bar = tf.concat(outputsHypothesis, axis=2)
            print_shape('a_bar', a_bar)
            print_shape('b_bar', b_bar)
            return a_bar, b_bar
예제 #14
0
    def _attendBlock(self, scope):
        """
        :param scope: scope name

        embeded_left, embeded_right: tensor with shape (batch_size, seq_length, embedding_size)
        F_a_bar, F_b_bar: output of feed forward layer (F), tensor with shape (batch_size, seq_length, hidden_size)
        attentionSoft_a, attentionSoft_b: using Softmax at two directions, tensor with shape (batch_size, seq_length, seq_length)
        e: attention matrix with mask, tensor with shape (batch_size, seq_length, seq_length)

        :return: alpha: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
                 beta: context vectors, tensor with shape (batch_size, seq_length, embedding_size)
        """
        with tf.device('/cpu:0'):
            self.Embedding = tf.get_variable('Embedding', [self.n_vocab, self.embedding_size], tf.float32)
            self.embeded_left = tf.nn.embedding_lookup(self.Embedding, self.premise)
            self.embeded_right = tf.nn.embedding_lookup(self.Embedding, self.hypothesis)
            print_shape('embeded_left', self.embeded_left)
            print_shape('embeded_right', self.embeded_right)

        with tf.variable_scope(scope):
            F_a_bar  = self._feedForwardBlock(self.embeded_left, self.hidden_size, 'F')
            F_b_bar = self._feedForwardBlock(self.embeded_right, self.hidden_size, 'F', isReuse = True)
            print_shape('F_a_bar', F_a_bar)
            print_shape('F_b_bar', F_b_bar)

            # e_i,j = F'(a_hat, b_hat) = F(a_hat).T * F(b_hat) (1)
            e_raw = tf.matmul(F_a_bar, tf.transpose(F_b_bar, [0, 2, 1]))
            # mask padding sequence
            mask = tf.multiply(tf.expand_dims(self.premise_mask, 2), tf.expand_dims(self.hypothesis_mask, 1))
            e = tf.multiply(e_raw, mask)
            print_shape('e', e)

            attentionSoft_a = tf.exp(e - tf.reduce_max(e, axis=2, keepdims=True))
            attentionSoft_b = tf.exp(e - tf.reduce_max(e, axis=1, keepdims=True))
            # mask attention weights
            attentionSoft_a = tf.multiply(attentionSoft_a, tf.expand_dims(self.hypothesis_mask, 1))
            attentionSoft_b = tf.multiply(attentionSoft_b, tf.expand_dims(self.premise_mask, 2))
            attentionSoft_a = tf.divide(attentionSoft_a, tf.reduce_sum(attentionSoft_a, axis=2, keepdims=True))
            attentionSoft_b = tf.divide(attentionSoft_b, tf.reduce_sum(attentionSoft_b, axis=1, keepdims=True))
            attentionSoft_a = tf.multiply(attentionSoft_a, mask)
            attentionSoft_b = tf.transpose(tf.multiply(attentionSoft_b, mask), [0, 2, 1])
            print_shape('att_soft_a', attentionSoft_a)
            print_shape('att_soft_b', attentionSoft_b)

            # beta = \sum_{j=1}^l_b \frac{\exp(e_{i,j})}{\sum_{k=1}^l_b \exp(e_{i,k})} * b_hat_j
            # alpha = \sum_{i=1}^l_a \frac{\exp(e_{i,j})}{\sum_{k=1}^l_a \exp(e_{k,j})} * a_hat_i (2)
            beta = tf.matmul(attentionSoft_b, self.embeded_left)
            alpha = tf.matmul(attentionSoft_a, self.embeded_right)
            print_shape('alpha', alpha)
            print_shape('beta', beta)

            return alpha, beta
예제 #15
0
    def _selfAttentiveEncodingBlock(self, scope):
        """
        :param scope: scope name

        embeded_left, embeded_right: tensor with shape (batch_size, seq_length, embedding_size)
        rnn_p, rnn_h: output of biLSTM layer, tensor with shape (batch_size, seq_length, 2 * rnn_size)

        :return: m_premise, m_hypothesis: output of self-attention layer, tensor with shape (batch_size, self_attention_r, 2 * rnn_size)
                 A_premise, A_hypothesis: self attention weights matrix, tensor with shape (batch_size, seq_attention_r, attention_size)
        """
        with tf.device('/cpu:0'):
            self.Embedding = tf.get_variable('Embedding', [self.n_vocab, self.embedding_size], tf.float32)
            self.embeded_left = tf.nn.embedding_lookup(self.Embedding, self.premise)
            self.embeded_right = tf.nn.embedding_lookup(self.Embedding, self.hypothesis)
            print_shape('embeded_left', self.embeded_left)
            print_shape('embeded_right', self.embeded_right)

        with tf.variable_scope(scope):
            rnn_outputs_premise, final_state_premise = self._biLSTMBlock(self.embeded_left, self.rnn_size, 'R', self.premise_mask)
            rnn_outputs_hypothesis, final_state_hypothesis = self._biLSTMBlock(self.embeded_right, self.rnn_size, 'R', self.hypothesis_mask, isReuse = True)

            rnn_p = tf.concat(rnn_outputs_premise, axis=2)
            rnn_h = tf.concat(rnn_outputs_hypothesis, axis=2)
            print_shape('rnn_p', rnn_p)
            print_shape('rnn_h', rnn_h)

            self_attention_layer1 = SelfAttentionLayer(self.rnn_size, self.attention_size, self.self_attention_r, 'premise')
            self_attention_layer2 = SelfAttentionLayer(self.rnn_size, self.attention_size, self.self_attention_r, 'hypothesis')
            m_premise, A_premise = self_attention_layer1(rnn_p)
            m_hypothesis, A_hypothesis = self_attention_layer2(rnn_h)
            print_shape('m_premise', m_premise)
            print_shape('m_hypothesis', m_hypothesis)
            print_shape('A_premise', A_premise)
            print_shape('A_hypothesis', A_hypothesis)

            return m_premise, A_premise, m_hypothesis, A_hypothesis
예제 #16
0
    def _localInferenceBlock(self, a_bar, b_bar, scope):
        """
        :param a_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size)
        :param b_bar: tensor with shape (batch_size, seq_length, 2 * hidden_size)
        :param scope: scope name

        attentionWeights: attention matrix, tensor with shape (batch_size, seq_length, seq_length)
        attentionSoft_a, attentionSoft_b: using Softmax at two directions, tensor with shape (batch_size, seq_length, seq_length)
        a_hat, b_hat: context vectors, tensor with shape (batch_size, seq_length, 2 * hidden_size)
        a_diff, b_diff: difference of a_bar and a_hat, b_bar and b_hat, tensor with shape (batch_size, seq_length, 2 * hidden_size)
        a_mul, b_mul: hadamard product of a_bar and a_hat, b_bar and b_hat, tensor with shape (batch_size, seq_length, 2 * hidden_size)

        :return: m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
                 m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        """
        with tf.variable_scope(scope):
            # e = a_bar.T * b_bar (11)
            attentionWeights = tf.matmul(a_bar, tf.transpose(b_bar, [0, 2, 1]))
            print_shape('att_wei', attentionWeights)

            # a_hat = softmax(e) * b_bar (12)
            # b_hat = softmax(e) * a_bar (13)
            attentionSoft_a = tf.nn.softmax(attentionWeights)
            attentionSoft_b = tf.nn.softmax(tf.transpose(attentionWeights))
            attentionSoft_b = tf.transpose(attentionSoft_b)
            print_shape('att_soft_a', attentionSoft_a)
            print_shape('att_soft_b', attentionSoft_b)

            a_hat = tf.matmul(attentionSoft_a, b_bar)
            b_hat = tf.matmul(attentionSoft_b, a_bar)
            print_shape('a_hat', a_hat)
            print_shape('b_hat', b_hat)

            a_diff = tf.subtract(a_bar, a_hat)
            a_mul = tf.multiply(a_bar, a_hat)
            print_shape('a_diff', a_diff)
            print_shape('a_mul', a_mul)

            b_diff = tf.subtract(b_bar, b_hat)
            b_mul = tf.multiply(b_bar, b_hat)

            # m_a = [a_bar, a_hat, a_bar - a_hat, a_bar 'dot' a_hat] (14)
            # m_b = [b_bar, b_hat, b_bar - b_hat, b_bar 'dot' b_hat] (15)
            m_a = tf.concat([a_bar, a_hat, a_diff, a_mul], axis=2)
            m_b = tf.concat([b_bar, b_hat, b_diff, b_mul], axis=2)
            print_shape('m_a', m_a)
            print_shape('m_b', m_b)
            return m_a, m_b