Beispiel #1
0
    def input_encoding_block(self, scope):
        """
           encoding block
        """
        # build self.embedding
        self._embedding = self.add_word_embedding()
        print("embedd shape", self._embedding.shape)
        self.embed1 = tf.nn.embedding_lookup(self._embedding, self.x1)
        #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout)
        #self.embed1 = tf.expand_dims(self.embed1, -1)
        print("word embedd1 shape", self.embed1.shape)

        self.embed2 = tf.nn.embedding_lookup(self._embedding, self.x2)
        #self.embed2 = tf.nn.dropout(self.embed2, self.config.dropout)
        #self.embed2 = tf.expand_dims(self.embed2, -1)
        print("word embedd2 shape", self.embed2.shape)
        attention_block = TransformerEncoder(self.config, train=True)
        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            outputs_x1 = attention_block(self.x1, self.embed1)
            outputs_x2 = attention_block(self.x2, self.embed2)

            a_bar = tf.concat(outputs_x1, axis=2)
            b_bar = tf.concat(outputs_x2, axis=2)
            print_shape('a_bar', a_bar)
            print_shape('b_bar', b_bar)
            return a_bar, b_bar
Beispiel #2
0
    def input_encoding_block(self, scope):
        """
           encoding block
        """
        # build self.embedding
        self._embedding = self.add_word_embedding()
        print("embedd shape", self._embedding.shape)
        self.embed1 = tf.nn.embedding_lookup(self._embedding, self.x1)
        #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout)
        #self.embed1 = tf.expand_dims(self.embed1, -1)
        print("word embedd1 shape", self.embed1.shape)

        self.embed2 = tf.nn.embedding_lookup(self._embedding, self.x2)
        #self.embed2 = tf.nn.dropout(self.embed2, self.config.dropout)
        #self.embed2 = tf.expand_dims(self.embed2, -1)
        print("word embedd2 shape", self.embed2.shape)

        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            if self.config.using_actual_len:
                outputs_x1, final_states_x1 = _bilstm_block(self.embed1, self.config.hidden_size, 'bilstm', seq_len=self.x1_mask)
                outputs_x2, final_states_x2 = _bilstm_block(self.embed2, self.config.hidden_size, 'bilstm', seq_len=self.x2_mask, reuse=True)
            else:
                outputs_x1, final_states_x1 = _bilstm_block(self.embed1, self.config.hidden_size, 'bilstm',self.config.dropout)
                outputs_x2, final_states_x2 = _bilstm_block(self.embed2, self.config.hidden_size, 'bilstm', self.config.dropout, reuse=True)

            a_bar = tf.concat(outputs_x1, axis=2)
            b_bar = tf.concat(outputs_x2, axis=2)
            print_shape('a_bar', a_bar)
            print_shape('b_bar', b_bar)
            return a_bar, b_bar
Beispiel #3
0
    def composition_block(self, m_a, m_b, hidden_size, scope):
        """
        :param m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param hiddenSize: biLSTM cell's hidden states size
        :param scope: scope name
        outputV_a, outputV_b: hidden states of biLSTM, tuple (forward LSTM cell, backward LSTM cell)
        v_a, v_b: concate of biLSTM hidden states, tensor with shape (batch_size, seq_length, 2 * hidden_size)
        v_a_avg, v_b_avg: timestep (axis = seq_length) average of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v_a_max, v_b_max: timestep (axis = seq_length) max value of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v: concat of [v_a_avg, v_b_avg, v_a_max, v_b_max], tensor with shape (batch_size, 4 * 2 * hidden_size)
        :return: y_hat: output of feed forward layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            outputV_a, finalStateV_a = _bilstm_block(m_a, hidden_size,
                                                     'biLSTM',
                                                     self.config.dropout)
            outputV_b, finalStateV_b = _bilstm_block(m_b,
                                                     hidden_size,
                                                     'biLSTM',
                                                     self.config.dropout,
                                                     reuse=True)
            v_a = tf.concat(outputV_a, axis=2)
            v_b = tf.concat(outputV_b, axis=2)

            print_shape('v_a', v_a)
            print_shape('v_b', v_b)
Beispiel #4
0
    def composition_block(self, m_a, m_b, hidden_size, scope):
        """
        :param m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size)
        :param hiddenSize: biLSTM cell's hidden states size
        :param scope: scope name
        outputV_a, outputV_b: hidden states of biLSTM, tuple (forward LSTM cell, backward LSTM cell)
        v_a, v_b: concate of biLSTM hidden states, tensor with shape (batch_size, seq_length, 2 * hidden_size)
        v_a_avg, v_b_avg: timestep (axis = seq_length) average of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v_a_max, v_b_max: timestep (axis = seq_length) max value of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size)
        v: concat of [v_a_avg, v_b_avg, v_a_max, v_b_max], tensor with shape (batch_size, 4 * 2 * hidden_size)
        :return: y_hat: output of feed forward layer, tensor with shape (batch_size, n_classes)
        """
        with tf.variable_scope(scope):
            outputV_a, finalStateV_a = _bilstm_block(m_a, hidden_size,
                                                     'biLSTM',
                                                     self.config.dropout)
            outputV_b, finalStateV_b = _bilstm_block(m_b,
                                                     hidden_size,
                                                     'biLSTM',
                                                     self.config.dropout,
                                                     reuse=True)
            v_a = tf.concat(outputV_a, axis=2)
            v_b = tf.concat(outputV_b, axis=2)

            print_shape('v_a', v_a)
            print_shape('v_b', v_b)

            # v_{a,avg} = \sum_{i=1}^l_a \frac{v_a,i}{l_a}, v_{a,max} = \max_{i=1} ^ l_a v_{a,i} (18)
            # v_{b,avg} = \sum_{j=1}^l_b \frac{v_b,j}{l_b}, v_{b,max} = \max_{j=1} ^ l_b v_{b,j} (19)
            v_a_avg = tf.reduce_mean(v_a, axis=1)
            v_b_avg = tf.reduce_mean(v_b, axis=1)
            v_a_max = tf.reduce_max(v_a, axis=1)
            v_b_max = tf.reduce_max(v_b, axis=1)
            print_shape('v_a_avg', v_a_avg)
            print_shape('v_a_max', v_a_max)

            # v = [v_{a,avg}; v_{a,max}; v_{b,avg}; v_{b_max}] (20)
            v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1)
            print_shape('v', v)
            y_hat = _feedforward_block(v, self.config.dense_size,
                                       self.config.n_classes, 'feed_forward',
                                       self.config.dropout)
            return y_hat
Beispiel #5
0
    def essemble_block(self, layers):
        """
            weight layers by softmax
        """
        #print_shape("multul layer", layers)
        n_layers = len(layers)
        with tf.variable_scope("layer_weight"):
            W = tf.get_variable(
                "W",
                shape=(n_layers, ),
                initializer=tf.zeros_initializer,
                # regularizer=_l2_regularizer,
                trainable=True,
            )
            # scale the weighted sum by gamma
            gamma = tf.get_variable(
                'gamma',
                shape=(1, ),
                initializer=tf.ones_initializer,
                regularizer=None,
                trainable=True,
            )
            # normalize the weights
            normed_weights = tf.split(tf.nn.softmax(W + 1.0 / n_layers),
                                      n_layers)
            print_shape("normal weights", normed_weights[0])
            weighted_layer = []
            for w, l in zip(normed_weights, layers):
                weighted_layer.append(w * l)
            essemble = tf.add_n(weighted_layer)

            #normed_weights = tf.nn.softmax(W + 1.0 / n_layers)
            #print_shape("normal_weights", normed_weights)
            #layers = tf.concat(layers, axis=1)
            #print_shape("layers concated...", layers)
            #layers = tf.reshape(layers, (-1, 3, 128))
            #print_shape("layers reshaped...", layers)
            #essemble = tf.matmul(normed_weights, layers)
            print_shape("essemble", essemble)
            return essemble
Beispiel #6
0
    def input_encoding_block(self, scope):
        """
           encoding block
        """
        # build self.embedding
        self._embedding = self.add_word_embedding()
        print("word embedding shape", self._embedding.shape)
        self.embed = tf.nn.embedding_lookup(self._embedding, self.x)
        print("embedd shape", self.embed.shape)

        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            if self.config.using_actual_len:
                outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm', seq_len=self.x_mask)
            else:
                outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm',self.config.dropout)

            print(final_states[0][0])
            bar = tf.concat((final_states[0][0], final_states[1][0]), axis=1)
            print_shape('bar', bar)
            return bar 
Beispiel #7
0
    def bilstm_encoding_block(self, embed1, embed2, scope):
        """
           bilstm encoding block
        """

        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            if self.config.using_actual_len:
                outputs_x1, final_states_x1 = _bilstm_block(
                    embed1,
                    self.config.hidden_size,
                    'bilstm',
                    seq_len=self.x1_mask)
                outputs_x2, final_states_x2 = _bilstm_block(
                    embed2,
                    self.config.hidden_size,
                    'bilstm',
                    seq_len=self.x2_mask,
                    reuse=True)
            else:
                outputs_x1, final_states_x1 = _bilstm_block(
                    embed1, self.config.hidden_size, 'bilstm',
                    self.config.dropout)
                outputs_x2, final_states_x2 = _bilstm_block(
                    embed2,
                    self.config.hidden_size,
                    'bilstm',
                    self.config.dropout,
                    reuse=True)

            a_bar = tf.concat(outputs_x1, axis=2)
            b_bar = tf.concat(outputs_x2, axis=2)
            print_shape('a_bar', a_bar)
            print_shape('b_bar', b_bar)
            return a_bar, b_bar
Beispiel #8
0
    def input_encoding_block(self, scope):
        """
           encoding block
        """
        # build self.embedding
        self._embedding, self._embedding_char = self.add_word_embedding()
        print("word embedding shape", self._embedding.shape)
        print("char embedding shape", self._embedding_char.shape)
        self.embed_word = tf.nn.embedding_lookup(self._embedding, self.x)
        #self.embed_char = tf.nn.embedding_lookup(self._embedding_char, self.x_char)
        #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout)
        #self.embed1 = tf.expand_dims(self.embed1, -1)
        #print("char embedd shape", self.embed_char.shape)
        self.embed_char_cnn = _build_char_cnn(self.x_pinyin, self.config)
        print("cnn char embedd shape", self.embed_char_cnn.shape)
        self.embed = tf.concat([self.embed_word, self.embed_char_cnn], axis=2)
        print("cnn char embedd shape", self.embed.shape)

        with tf.variable_scope(scope):
            # a_bar = BiLSTM(a, i) (1)
            # b_bar = BiLSTM(b, i) (2)
            if self.config.using_actual_len:
                outputs, final_states = _bilstm_block(self.embed,
                                                      self.config.hidden_size,
                                                      'bilstm',
                                                      seq_len=self.x_mask)
            else:
                outputs, final_states = _bilstm_block(self.embed,
                                                      self.config.hidden_size,
                                                      'bilstm',
                                                      self.config.dropout)

            print(final_states[0][0])
            bar = tf.concat((final_states[0][0], final_states[1][0]), axis=1)
            print_shape('bar', bar)
            return bar
Beispiel #9
0
    def interact_block(self, v_a, v_b, scope):
        """
	    encode interact     
	"""
        with tf.variable_scope(scope):
            # v_{a,avg} = \sum_{i=1}^l_a \frac{v_a,i}{l_a}, v_{a,max} = \max_{i=1} ^ l_a v_{a,i} (18)
            # v_{b,avg} = \sum_{j=1}^l_b \frac{v_b,j}{l_b}, v_{b,max} = \max_{j=1} ^ l_b v_{b,j} (19)
            v_a_avg = tf.reduce_mean(v_a, axis=1)
            v_b_avg = tf.reduce_mean(v_b, axis=1)
            v_a_max = tf.reduce_max(v_a, axis=1)
            v_b_max = tf.reduce_max(v_b, axis=1)
            print_shape('v_a_avg', v_a_avg)
            print_shape('v_a_max', v_a_max)

            # v = [v_{a,avg}; v_{a,max}; v_{b,avg}; v_{b_max}] (20)
            v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1)
            print_shape('v', v)
            f_out = _feedforward_block(v, self.config.dense_size,
                                       self.config.essemble_feature_size,
                                       'feed_forward', self.config.dropout)
            return f_out
Beispiel #10
0
    def local_infer_block(self, x1_bar, x2_bar, scope):
        """
           attention block
           x1_bar: shape (batch, seq_len, 2*hidden_size)
           x2_bar: shape (batch, seq_len, 2*hidden_size)
        """
        attention_matrix = tf.matmul(x1_bar,
                                     x2_bar,
                                     transpose_b=True,
                                     name="attention_matrix")
        print_shape("attention_matrix shape:", attention_matrix)

        attention_soft_x1 = tf.nn.softmax(attention_matrix)
        attention_soft_x2 = tf.nn.softmax(tf.transpose(attention_matrix))
        attention_soft_x2 = tf.transpose(attention_soft_x2)
        print_shape("soft attention x1", attention_soft_x1)
        print_shape("soft attention x2", attention_soft_x2)

        x1_atten = tf.matmul(attention_soft_x1, x2_bar)
        x2_atten = tf.matmul(attention_soft_x2, x1_bar)
        print_shape("x1 atten", x1_atten)
        print_shape("x2 atten", x2_atten)

        x1_diff = tf.subtract(x1_bar, x1_atten)
        x1_mul = tf.multiply(x1_bar, x1_atten)
        print_shape("x1 diff", x1_diff)
        print_shape("x1 mul", x1_mul)

        x2_diff = tf.subtract(x2_bar, x2_atten)
        x2_mul = tf.multiply(x2_bar, x2_atten)
        print_shape("x2 diff", x2_diff)
        print_shape("x2 mul", x2_mul)

        # [batch, seq_len, 4*hidden_size]
        # [x1, x1_atten, x1-x1_atten, x1*x1_atten]
        m_a = tf.concat([x1_bar, x1_atten, x1_diff, x1_mul], axis=2)
        m_b = tf.concat([x2_bar, x2_atten, x2_diff, x2_mul], axis=2)
        print_shape("m_a", m_a)
        print_shape("m_b", m_b)

        return m_a, m_b