def AttentionLayer(inputs, n_step, hidden_size, name): # inputs: [batch_size, n_step, hidden_size * 2] with tf.variable_scope(name): u_context = tf.Variable(tf.truncated_normal([hidden_size * 2]), name='u_context') weights = myTF.variable_with_weight_decay( name + '_weights', tf.truncated_normal([hidden_size * 2], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( name + '_biases', tf.constant(0.1, shape=[hidden_size * 2]), wd=None, dtype=tf.float32) # [batch_size, n_step, hidden_size * 2] h = tf.tanh(inputs * weights + biases) # [batch_size, n_step, 1] alpha = tf.nn.softmax(tf.reduce_sum(tf.multiply(h, u_context), axis=2, keep_dims=True), dim=1) # [batch_size, hidden_size * 2] outputs = tf.reduce_sum(tf.multiply(inputs, alpha), axis=1) return outputs
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, Word2vec=True, Trainable=False): """ :param sequence_length: int. Tokens number for each input sample.Remember that we padded all our samples to have the same length :param num_classes: int. Total classes/labels for classification. :param vocab_size: int. Total tokens/words number for whole dataset.This is needed to define the size of our embedding layer, which will have shape [vocabulary_size, embedding_size] :param embedding_size: int. word2vec dimension. :param filter_sizes: list. The number of words we want our convolutional filters to cover. We will have num_filters for each size specified here. For example, [3, 4, 5] means that we will have filters that slide over 3, 4 and 5 words respectively, for a total of 3 * num_filters filters. :param num_filters: int. The number of filters per filter size. """ self._sequence_length = sequence_length self._num_classes = num_classes self._vocab_size = vocab_size self._embedding_size = embedding_size self._filter_sizes = filter_sizes self._num_filters = num_filters self._paramaters_list = [] # Embedding layer with tf.name_scope("embedding"): if Word2vec: lineslist = open( os.path.join(OPTION.DATA_PATH, OPTION.DATA_VEC_NAME), 'r').readlines() headline = lineslist[0] headline = [int(i) for i in (headline.strip()).split(' ')] self._vocab_size = headline[0] + 1 # index 0 is for padding self._embedding_size = headline[1] vectors = np.zeros([self._vocab_size, self._embedding_size], dtype=np.float32) for index in range(1, self._vocab_size): line = lineslist[index] vec = [float(i) for i in (line.strip()).split(' ')[1:]] vectors[index] = np.array(vec, dtype=np.float32) self.vectors = myTF.variable_with_weight_decay( 'vectors', vectors, trainable=Trainable, wd=None, dtype=tf.float32) else: self.vectors = myTF.variable_with_weight_decay( 'vectors', tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), trainable=Trainable, wd=None, dtype=tf.float32)
def __init__(self, sequence_length, sent_length, num_classes, vocab_size, embedding_size, Word2vec=True, Trainable=False): """ :param sequence_length: int. Number of sentences for each input sample.Remember that we padded all our samples to have the same length :param sent_length: int. Similar to sequence_length, number of tokens in each (padded) sentence :param num_classes: int. Total classes/labels for classification. :param vocab_size: int. Total tokens/words number for whole dataset.This is needed to define the size of our embedding layer, which will have shape [vocabulary_size, embedding_size] :param embedding_size: int. word2vec dimension. """ self._sequence_length = sequence_length self._sent_length = sent_length self._num_classes = num_classes self._vocab_size = vocab_size self._embedding_size = embedding_size # Embedding layer with tf.name_scope("embedding"): if Word2vec: lineslist = open( os.path.join(OPTION.DATA_PATH, OPTION.DATA_VEC_NAME), 'r').readlines() headline = lineslist[0] headline = [int(i) for i in (headline.strip()).split(' ')] self._vocab_size = headline[0] + 1 # index 0 is for padding self._embedding_size = headline[1] if embedding_size is not None: assert headline[1] == embedding_size, 'error, %d!=%d' % ( headline[1], embedding_size) vectors = np.zeros([self._vocab_size, self._embedding_size], dtype=np.float32) for index in range(1, self._vocab_size): line = lineslist[index] vec = [float(i) for i in (line.strip()).split(' ')[1:]] vectors[index] = np.array(vec, dtype=np.float32) self.vectors = myTF.variable_with_weight_decay( 'vectors', vectors, trainable=Trainable, wd=None, dtype=tf.float32) else: self.vectors = myTF.variable_with_weight_decay( 'vectors', tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), trainable=Trainable, wd=None, dtype=tf.float32)
def inference(self, input, eval_data=False): """ :param input: 2D tensor of [None, sequence_length] :return scores: 2D tensor of [None, num_classes] """ # Embedding layer with tf.name_scope("embedding"): # with shape [None, sequence_length, embedding_size] embedded_chars = tf.nn.embedding_lookup(self.vectors, input) # with shape [None, sequence_length, embedding_size, 1] embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(self._filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, self._embedding_size, 1, self._num_filters] weights = myTF.variable_with_weight_decay('weights', tf.truncated_normal(filter_shape, stddev=0.1), wd=None, dtype = tf.float32) biases = myTF.variable_with_weight_decay('biases', tf.constant(0.1, shape=[self._num_filters]), wd=None, dtype = tf.float32) conv = tf.nn.conv2d(embedded_chars_expanded, weights, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity ouput = tf.nn.relu(tf.nn.bias_add(conv, biases), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( ouput, ksize=[1, self._sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = self._num_filters * len(self._filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) features = h_pool_flat # Add dropout if not eval_data: with tf.name_scope("dropout"): h_pool_flat = tf.nn.dropout(h_pool_flat, OPTION.DROPOUT_KEEP_PROB) # Final (unnormalized) scores and predictions with tf.name_scope("output"): weights = myTF.variable_with_weight_decay('weights', tf.truncated_normal([num_filters_total, self._num_classes], stddev=0.1), wd=None, dtype = tf.float32) biases = myTF.variable_with_weight_decay('biases', tf.constant(0.1, shape=[self._num_classes]), wd=None, dtype = tf.float32) scores = tf.nn.xw_plus_b(h_pool_flat, weights, biases, name="scores") return scores, features
def inference(self, input, eval_data=False): """ :param input: 2D tensor of [None, sequence_length, sent_length] :return scores: 2D tensor of [None, num_classes] """ # Embedding layer with tf.name_scope("embedding"): # with shape [None, sequence_length, sent_length, embedding_size] embedded_chars = tf.nn.embedding_lookup(self.vectors, input) # with shape [None, sequence_length, sent_length, embedding_size, 1] embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # Create a biRNN for each sentence def biRNNLayer(inputs, hidden_size, name): # inputs: [batch_size, n_step, dim] def length(sequences): used = tf.sign( tf.reduce_max(tf.abs(sequences), reduction_indices=2)) seq_len = tf.reduce_sum(used, reduction_indices=1) return tf.cast(seq_len, tf.int32) with tf.variable_scope(name): GRU_cell_fw = tf.contrib.rnn.GRUCell(hidden_size) GRU_cell_bw = tf.contrib.rnn.GRUCell(hidden_size) if not eval_data: GRU_cell_fw = tf.contrib.rnn.DropoutWrapper( cell=GRU_cell_fw, input_keep_prob=1.0, output_keep_prob=OPTION.DROPOUT_KEEP_PROB) GRU_cell_bw = tf.contrib.rnn.DropoutWrapper( cell=GRU_cell_bw, input_keep_prob=1.0, output_keep_prob=OPTION.DROPOUT_KEEP_PROB) inputs_list = tf.unstack(tf.transpose(inputs, [1, 0, 2])) outputs, _, _ = tf.nn.static_bidirectional_rnn( cell_fw=GRU_cell_fw, cell_bw=GRU_cell_bw, inputs=inputs_list, sequence_length=length(inputs), dtype=tf.float32) # outputs: [batch_size, n_step, hidden_size*2] outputs = tf.transpose(tf.stack(outputs, 0), [1, 0, 2]) return outputs def AttentionLayer(inputs, n_step, hidden_size, name): # inputs: [batch_size, n_step, hidden_size * 2] with tf.variable_scope(name): u_context = tf.Variable(tf.truncated_normal([hidden_size * 2]), name='u_context') weights = myTF.variable_with_weight_decay( name + '_weights', tf.truncated_normal([hidden_size * 2], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( name + '_biases', tf.constant(0.1, shape=[hidden_size * 2]), wd=None, dtype=tf.float32) # [batch_size, n_step, hidden_size * 2] h = tf.tanh(inputs * weights + biases) # [batch_size, n_step, 1] alpha = tf.nn.softmax(tf.reduce_sum(tf.multiply(h, u_context), axis=2, keep_dims=True), dim=1) # [batch_size, hidden_size * 2] outputs = tf.reduce_sum(tf.multiply(inputs, alpha), axis=1) return outputs with tf.name_scope("sent2vec"): # [batch_size * sequence_length, sent_length, embedding_size] word_embedded = tf.reshape( embedded_chars, [-1, self._sent_length, self._embedding_size]) # [batch_size * sequence_length, sent_length, hidden_size*2] word_encoded = biRNNLayer(word_embedded, hidden_size=OPTION.WORD_HIDDEN_SIZE, name='word_encoder') # word_encoded = word_embedded # [batch_size * sequence_length, hidden_size*2] sent_vec = AttentionLayer(word_encoded, n_step=self._sent_length, hidden_size=OPTION.WORD_HIDDEN_SIZE, name='word_attention') sent_vec = tf.reshape( sent_vec, [-1, self._sequence_length, OPTION.WORD_HIDDEN_SIZE * 2]) with tf.name_scope("doc2vec"): # [batch_size, sequence_length, hidden_size * 2] doc_encoded = biRNNLayer(sent_vec, OPTION.SENT_HIDDEN_SIZE, name='sent_encoder') # doc_encoded = sent_vec # [batch_size, hidden_size*2] doc_vec = AttentionLayer(doc_encoded, n_step=self._sequence_length, hidden_size=OPTION.SENT_HIDDEN_SIZE, name='sent_attention') ret_feature = doc_vec # Add dropout if not eval_data: with tf.name_scope("dropout"): doc_vec = tf.nn.dropout(doc_vec, OPTION.DROPOUT_KEEP_PROB) # Final (unnormalized) scores and predictions with tf.name_scope("output"): weights = myTF.variable_with_weight_decay( 'weights', tf.truncated_normal( [OPTION.SENT_HIDDEN_SIZE * 2, self._num_classes], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_classes]), wd=None, dtype=tf.float32) scores = tf.nn.xw_plus_b(doc_vec, weights, biases, name="scores") return scores, ret_feature
def inference(self, words_seq, left_context, right_context, batch_size, keep_prob): """ :param words_seq: 2D tensor of [None, sequence_length] :return scores: 2D tensor of [None, num_classes] """ # Embedding layer with tf.name_scope("embedding"): # with shape [None, sequence_length, embedding_size] words_embedded = tf.nn.embedding_lookup(self.vectors, words_seq) cl_embedded = tf.nn.embedding_lookup(self.vectors, left_context) cr_embedded = tf.nn.embedding_lookup(self.vectors, right_context) ## bulid LSTM layer # outputs: [batch_size, time_steps/sequence_length, context_size] left_reps = self._LSTM(batch_size, keep_prob, cl_embedded, 'forward_LSTM') right_reps = self._LSTM(batch_size, keep_prob, cr_embedded, 'reverse_LSTM') right_reps = tf.reverse(right_reps, axis=[1]) outputs = tf.concat((left_reps, words_embedded, right_reps), axis=-1) # with shape [batch_size, sequence_length, embedding_size, 1] outputs = tf.expand_dims(outputs, -1) with tf.name_scope("convolution"): # Convolution Layer inputmaps = self._context_size * 2 + self._embedding_size outputmaps = self._feature_size filter_shape = [1, inputmaps, 1, outputmaps] weights = myTF.variable_with_weight_decay('weights', tf.truncated_normal( filter_shape, stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay('biases', tf.constant( 0.1, shape=[outputmaps]), wd=None, dtype=tf.float32) self._paramaters_list.append(weights) self._paramaters_list.append(biases) # with shape [batch_size, sequence_length, feature_size] conv = tf.nn.conv2d( outputs, weights, strides=[1, 1, 1, 1], padding="VALID", # no padding name="conv") # Apply nonlinearity features_conv = tf.nn.relu(tf.nn.bias_add(conv, biases), name="features_conv") with tf.name_scope("max-pooling"): # Maxpooling over the outputs features_pooled = tf.nn.max_pool( features_conv, ksize=[1, self._sequence_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") features = tf.reshape(features_pooled, (batch_size, self._feature_size)) with tf.name_scope("dropout"): # Add dropout features_dropout = tf.nn.dropout(features, keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): weights = myTF.variable_with_weight_decay( 'weights', tf.truncated_normal([self._feature_size, self._num_classes], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_classes]), wd=None, dtype=tf.float32) self._paramaters_list.append(weights) self._paramaters_list.append(biases) scores = tf.nn.xw_plus_b(features_dropout, weights, biases, name="scores") return scores, features
def inference(self, input, features_before, batch_size, eval_data=False): """ :param input: 2D tensor of [None, sequence_length] features_before: list, 3D tensor of [batch_size, timestep_size, feature_size] batch_size: tf.placeholder(tf.int32) :return scores: 2D tensor of [None, num_classes] """ # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): # with shape [None, sequence_length, embedding_size] embedded_chars = tf.nn.embedding_lookup(self.vectors, input) # with shape [None, sequence_length, embedding_size, 1] embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(self._filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [ filter_size, self._embedding_size, 1, self._num_filters ] weights = myTF.variable_with_weight_decay('weights', tf.truncated_normal( filter_shape, stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_filters]), wd=None, dtype=tf.float32) conv = tf.nn.conv2d(embedded_chars_expanded, weights, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity ouput = tf.nn.relu(tf.nn.bias_add(conv, biases), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( ouput, ksize=[1, self._sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = self._num_filters * len(self._filter_sizes) if tf.__version__[0] == '0': h_pool = tf.concat(3, pooled_outputs) else: h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # [batch_size, timestep_size, feature_size] features = tf.concat( 1, [features_before, tf.expand_dims(h_pool_flat, axis=1)]) ## bulid LSTM layer # add LSTM cell lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_filters_total, forget_bias=1.0, state_is_tuple=True) # add dropout layer if not eval_data: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=OPTION.DROPOUT_KEEP_PROB) # initial state init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) # (outputs, state) = tf.nn.rnn(cell=lstm_cell,inputs=features_list,initial_state=init_state,dtype=tf.float32) outputs, state = tf.nn.dynamic_rnn(lstm_cell, inputs=features, initial_state=init_state, time_major=False) # outputs: [batch_size, timestep_size, feature_size] output_t = outputs[:, -1, :] # Final (unnormalized) scores and predictions with tf.name_scope("output"): weights = myTF.variable_with_weight_decay( 'weights', tf.truncated_normal([num_filters_total, self._num_classes], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_classes]), wd=None, dtype=tf.float32) scores = tf.nn.xw_plus_b(output_t, weights, biases, name="scores") return scores, output_t
def inference(self, input, features_before, batch_size, eval_data=False): """ :param input: 2D tensor of [None, sequence_length] features_before: list, 3D tensor of [batch_size, timestep_size, feature_size] batch_size: tf.placeholder(tf.int32) :return scores: 2D tensor of [None, num_classes] """ # Embedding layer with tf.name_scope("embedding"): # with shape [None, sequence_length, embedding_size] embedded_chars = tf.nn.embedding_lookup(self.vectors, input) # with shape [None, sequence_length, embedding_size, 1] embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(self._filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [ filter_size, self._embedding_size, 1, self._num_filters ] weights = myTF.variable_with_weight_decay('weights', tf.truncated_normal( filter_shape, stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_filters]), wd=None, dtype=tf.float32) conv = tf.nn.conv2d(embedded_chars_expanded, weights, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity ouput = tf.nn.relu(tf.nn.bias_add(conv, biases), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( ouput, ksize=[1, self._sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = self._num_filters * len(self._filter_sizes) if tf.__version__[0] == '0': h_pool = tf.concat(3, pooled_outputs) else: h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) if features_before == None: features = h_pool_flat else: if tf.__version__[0] == '0': concat_feature = tf.concat(1, [features_before, h_pool_flat]) else: concat_feature = tf.concat([features_before, h_pool_flat], 1) weights = myTF.variable_with_weight_decay( 'weights', tf.truncated_normal( [concat_feature.get_shape()[1].value, num_filters_total], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[num_filters_total]), wd=None, dtype=tf.float32) features = tf.matmul(concat_feature, weights) features = tf.nn.relu(tf.nn.bias_add(features, biases)) # Add dropout if not eval_data: with tf.name_scope("dropout"): features_dropout = tf.nn.dropout(features, OPTION.DROPOUT_KEEP_PROB) else: features_dropout = features # Final (unnormalized) scores and predictions with tf.name_scope("output"): weights = myTF.variable_with_weight_decay( 'weights', tf.truncated_normal( [features_dropout.get_shape()[1].value, self._num_classes], stddev=0.1), wd=None, dtype=tf.float32) biases = myTF.variable_with_weight_decay( 'biases', tf.constant(0.1, shape=[self._num_classes]), wd=None, dtype=tf.float32) scores = tf.nn.xw_plus_b(features_dropout, weights, biases, name="scores") return scores, h_pool_flat