def input_encoding_block(self, scope): """ encoding block """ # build self.embedding self._embedding = self.add_word_embedding() print("embedd shape", self._embedding.shape) self.embed1 = tf.nn.embedding_lookup(self._embedding, self.x1) #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout) #self.embed1 = tf.expand_dims(self.embed1, -1) print("word embedd1 shape", self.embed1.shape) self.embed2 = tf.nn.embedding_lookup(self._embedding, self.x2) #self.embed2 = tf.nn.dropout(self.embed2, self.config.dropout) #self.embed2 = tf.expand_dims(self.embed2, -1) print("word embedd2 shape", self.embed2.shape) attention_block = TransformerEncoder(self.config, train=True) with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) outputs_x1 = attention_block(self.x1, self.embed1) outputs_x2 = attention_block(self.x2, self.embed2) a_bar = tf.concat(outputs_x1, axis=2) b_bar = tf.concat(outputs_x2, axis=2) print_shape('a_bar', a_bar) print_shape('b_bar', b_bar) return a_bar, b_bar
def input_encoding_block(self, scope): """ encoding block """ # build self.embedding self._embedding = self.add_word_embedding() print("embedd shape", self._embedding.shape) self.embed1 = tf.nn.embedding_lookup(self._embedding, self.x1) #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout) #self.embed1 = tf.expand_dims(self.embed1, -1) print("word embedd1 shape", self.embed1.shape) self.embed2 = tf.nn.embedding_lookup(self._embedding, self.x2) #self.embed2 = tf.nn.dropout(self.embed2, self.config.dropout) #self.embed2 = tf.expand_dims(self.embed2, -1) print("word embedd2 shape", self.embed2.shape) with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) if self.config.using_actual_len: outputs_x1, final_states_x1 = _bilstm_block(self.embed1, self.config.hidden_size, 'bilstm', seq_len=self.x1_mask) outputs_x2, final_states_x2 = _bilstm_block(self.embed2, self.config.hidden_size, 'bilstm', seq_len=self.x2_mask, reuse=True) else: outputs_x1, final_states_x1 = _bilstm_block(self.embed1, self.config.hidden_size, 'bilstm',self.config.dropout) outputs_x2, final_states_x2 = _bilstm_block(self.embed2, self.config.hidden_size, 'bilstm', self.config.dropout, reuse=True) a_bar = tf.concat(outputs_x1, axis=2) b_bar = tf.concat(outputs_x2, axis=2) print_shape('a_bar', a_bar) print_shape('b_bar', b_bar) return a_bar, b_bar
def composition_block(self, m_a, m_b, hidden_size, scope): """ :param m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size) :param m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size) :param hiddenSize: biLSTM cell's hidden states size :param scope: scope name outputV_a, outputV_b: hidden states of biLSTM, tuple (forward LSTM cell, backward LSTM cell) v_a, v_b: concate of biLSTM hidden states, tensor with shape (batch_size, seq_length, 2 * hidden_size) v_a_avg, v_b_avg: timestep (axis = seq_length) average of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size) v_a_max, v_b_max: timestep (axis = seq_length) max value of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size) v: concat of [v_a_avg, v_b_avg, v_a_max, v_b_max], tensor with shape (batch_size, 4 * 2 * hidden_size) :return: y_hat: output of feed forward layer, tensor with shape (batch_size, n_classes) """ with tf.variable_scope(scope): outputV_a, finalStateV_a = _bilstm_block(m_a, hidden_size, 'biLSTM', self.config.dropout) outputV_b, finalStateV_b = _bilstm_block(m_b, hidden_size, 'biLSTM', self.config.dropout, reuse=True) v_a = tf.concat(outputV_a, axis=2) v_b = tf.concat(outputV_b, axis=2) print_shape('v_a', v_a) print_shape('v_b', v_b)
def composition_block(self, m_a, m_b, hidden_size, scope): """ :param m_a: concat of [a_bar, a_hat, a_diff, a_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size) :param m_b: concat of [b_bar, b_hat, b_diff, b_mul], tensor with shape (batch_size, seq_length, 4 * 2 * hidden_size) :param hiddenSize: biLSTM cell's hidden states size :param scope: scope name outputV_a, outputV_b: hidden states of biLSTM, tuple (forward LSTM cell, backward LSTM cell) v_a, v_b: concate of biLSTM hidden states, tensor with shape (batch_size, seq_length, 2 * hidden_size) v_a_avg, v_b_avg: timestep (axis = seq_length) average of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size) v_a_max, v_b_max: timestep (axis = seq_length) max value of v_a, v_b, tensor with shape (batch_size, 2 * hidden_size) v: concat of [v_a_avg, v_b_avg, v_a_max, v_b_max], tensor with shape (batch_size, 4 * 2 * hidden_size) :return: y_hat: output of feed forward layer, tensor with shape (batch_size, n_classes) """ with tf.variable_scope(scope): outputV_a, finalStateV_a = _bilstm_block(m_a, hidden_size, 'biLSTM', self.config.dropout) outputV_b, finalStateV_b = _bilstm_block(m_b, hidden_size, 'biLSTM', self.config.dropout, reuse=True) v_a = tf.concat(outputV_a, axis=2) v_b = tf.concat(outputV_b, axis=2) print_shape('v_a', v_a) print_shape('v_b', v_b) # v_{a,avg} = \sum_{i=1}^l_a \frac{v_a,i}{l_a}, v_{a,max} = \max_{i=1} ^ l_a v_{a,i} (18) # v_{b,avg} = \sum_{j=1}^l_b \frac{v_b,j}{l_b}, v_{b,max} = \max_{j=1} ^ l_b v_{b,j} (19) v_a_avg = tf.reduce_mean(v_a, axis=1) v_b_avg = tf.reduce_mean(v_b, axis=1) v_a_max = tf.reduce_max(v_a, axis=1) v_b_max = tf.reduce_max(v_b, axis=1) print_shape('v_a_avg', v_a_avg) print_shape('v_a_max', v_a_max) # v = [v_{a,avg}; v_{a,max}; v_{b,avg}; v_{b_max}] (20) v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1) print_shape('v', v) y_hat = _feedforward_block(v, self.config.dense_size, self.config.n_classes, 'feed_forward', self.config.dropout) return y_hat
def essemble_block(self, layers): """ weight layers by softmax """ #print_shape("multul layer", layers) n_layers = len(layers) with tf.variable_scope("layer_weight"): W = tf.get_variable( "W", shape=(n_layers, ), initializer=tf.zeros_initializer, # regularizer=_l2_regularizer, trainable=True, ) # scale the weighted sum by gamma gamma = tf.get_variable( 'gamma', shape=(1, ), initializer=tf.ones_initializer, regularizer=None, trainable=True, ) # normalize the weights normed_weights = tf.split(tf.nn.softmax(W + 1.0 / n_layers), n_layers) print_shape("normal weights", normed_weights[0]) weighted_layer = [] for w, l in zip(normed_weights, layers): weighted_layer.append(w * l) essemble = tf.add_n(weighted_layer) #normed_weights = tf.nn.softmax(W + 1.0 / n_layers) #print_shape("normal_weights", normed_weights) #layers = tf.concat(layers, axis=1) #print_shape("layers concated...", layers) #layers = tf.reshape(layers, (-1, 3, 128)) #print_shape("layers reshaped...", layers) #essemble = tf.matmul(normed_weights, layers) print_shape("essemble", essemble) return essemble
def input_encoding_block(self, scope): """ encoding block """ # build self.embedding self._embedding = self.add_word_embedding() print("word embedding shape", self._embedding.shape) self.embed = tf.nn.embedding_lookup(self._embedding, self.x) print("embedd shape", self.embed.shape) with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) if self.config.using_actual_len: outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm', seq_len=self.x_mask) else: outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm',self.config.dropout) print(final_states[0][0]) bar = tf.concat((final_states[0][0], final_states[1][0]), axis=1) print_shape('bar', bar) return bar
def bilstm_encoding_block(self, embed1, embed2, scope): """ bilstm encoding block """ with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) if self.config.using_actual_len: outputs_x1, final_states_x1 = _bilstm_block( embed1, self.config.hidden_size, 'bilstm', seq_len=self.x1_mask) outputs_x2, final_states_x2 = _bilstm_block( embed2, self.config.hidden_size, 'bilstm', seq_len=self.x2_mask, reuse=True) else: outputs_x1, final_states_x1 = _bilstm_block( embed1, self.config.hidden_size, 'bilstm', self.config.dropout) outputs_x2, final_states_x2 = _bilstm_block( embed2, self.config.hidden_size, 'bilstm', self.config.dropout, reuse=True) a_bar = tf.concat(outputs_x1, axis=2) b_bar = tf.concat(outputs_x2, axis=2) print_shape('a_bar', a_bar) print_shape('b_bar', b_bar) return a_bar, b_bar
def input_encoding_block(self, scope): """ encoding block """ # build self.embedding self._embedding, self._embedding_char = self.add_word_embedding() print("word embedding shape", self._embedding.shape) print("char embedding shape", self._embedding_char.shape) self.embed_word = tf.nn.embedding_lookup(self._embedding, self.x) #self.embed_char = tf.nn.embedding_lookup(self._embedding_char, self.x_char) #self.embed1 = tf.nn.dropout(self.embed1, self.config.dropout) #self.embed1 = tf.expand_dims(self.embed1, -1) #print("char embedd shape", self.embed_char.shape) self.embed_char_cnn = _build_char_cnn(self.x_pinyin, self.config) print("cnn char embedd shape", self.embed_char_cnn.shape) self.embed = tf.concat([self.embed_word, self.embed_char_cnn], axis=2) print("cnn char embedd shape", self.embed.shape) with tf.variable_scope(scope): # a_bar = BiLSTM(a, i) (1) # b_bar = BiLSTM(b, i) (2) if self.config.using_actual_len: outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm', seq_len=self.x_mask) else: outputs, final_states = _bilstm_block(self.embed, self.config.hidden_size, 'bilstm', self.config.dropout) print(final_states[0][0]) bar = tf.concat((final_states[0][0], final_states[1][0]), axis=1) print_shape('bar', bar) return bar
def interact_block(self, v_a, v_b, scope): """ encode interact """ with tf.variable_scope(scope): # v_{a,avg} = \sum_{i=1}^l_a \frac{v_a,i}{l_a}, v_{a,max} = \max_{i=1} ^ l_a v_{a,i} (18) # v_{b,avg} = \sum_{j=1}^l_b \frac{v_b,j}{l_b}, v_{b,max} = \max_{j=1} ^ l_b v_{b,j} (19) v_a_avg = tf.reduce_mean(v_a, axis=1) v_b_avg = tf.reduce_mean(v_b, axis=1) v_a_max = tf.reduce_max(v_a, axis=1) v_b_max = tf.reduce_max(v_b, axis=1) print_shape('v_a_avg', v_a_avg) print_shape('v_a_max', v_a_max) # v = [v_{a,avg}; v_{a,max}; v_{b,avg}; v_{b_max}] (20) v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1) print_shape('v', v) f_out = _feedforward_block(v, self.config.dense_size, self.config.essemble_feature_size, 'feed_forward', self.config.dropout) return f_out
def local_infer_block(self, x1_bar, x2_bar, scope): """ attention block x1_bar: shape (batch, seq_len, 2*hidden_size) x2_bar: shape (batch, seq_len, 2*hidden_size) """ attention_matrix = tf.matmul(x1_bar, x2_bar, transpose_b=True, name="attention_matrix") print_shape("attention_matrix shape:", attention_matrix) attention_soft_x1 = tf.nn.softmax(attention_matrix) attention_soft_x2 = tf.nn.softmax(tf.transpose(attention_matrix)) attention_soft_x2 = tf.transpose(attention_soft_x2) print_shape("soft attention x1", attention_soft_x1) print_shape("soft attention x2", attention_soft_x2) x1_atten = tf.matmul(attention_soft_x1, x2_bar) x2_atten = tf.matmul(attention_soft_x2, x1_bar) print_shape("x1 atten", x1_atten) print_shape("x2 atten", x2_atten) x1_diff = tf.subtract(x1_bar, x1_atten) x1_mul = tf.multiply(x1_bar, x1_atten) print_shape("x1 diff", x1_diff) print_shape("x1 mul", x1_mul) x2_diff = tf.subtract(x2_bar, x2_atten) x2_mul = tf.multiply(x2_bar, x2_atten) print_shape("x2 diff", x2_diff) print_shape("x2 mul", x2_mul) # [batch, seq_len, 4*hidden_size] # [x1, x1_atten, x1-x1_atten, x1*x1_atten] m_a = tf.concat([x1_bar, x1_atten, x1_diff, x1_mul], axis=2) m_b = tf.concat([x2_bar, x2_atten, x2_diff, x2_mul], axis=2) print_shape("m_a", m_a) print_shape("m_b", m_b) return m_a, m_b