def build_train_proc(self): # input layer (batch_size, n_steps, input_dim) self.encode_input = tf.placeholder(tf.float32, [None, self.max_n_words, self.input_dim]) self.encode_sent_len = tf.placeholder(tf.int32, [None]) self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size]) self.decode_input = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim]) self.decode_sent_len = tf.placeholder(tf.int32, [None]) self.is_training = tf.placeholder(tf.bool) self.reward = tf.placeholder(tf.float32, [None]) self.ans_vec = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim]) self.y = tf.placeholder(tf.int32, [None, self.max_r_words]) self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words]) self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training) self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training) sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(self.encode_input, self.word_lstm_dim, scope_name = 'sent_level_bilstm_rnn', input_len=self.encode_sent_len) sent_last_state = tf.concat([sent_state[0][1],sent_state[1][1]],axis=1) # sent_outputs = tf.reshape(sent_outputs, shape=[self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim]) # ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1) # sent_last_outputs = tf.gather_nd(sent_outputs,indices=ind) conv_sents = tf.reshape(sent_last_state,shape = [self.batch_size, self.max_n_sentences, self.lstm_dim]) # self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind) conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len) self.conv_last_state = conv_state[1] with tf.variable_scope('normal'): normal_first_W = tf.get_variable('normal_first_W', shape=[self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_first_b = tf.get_variable('normal_first_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_second_W = tf.get_variable('normal_second_W', shape=[self.decode_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_second_b = tf.get_variable('normal_second_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_mean_W = tf.get_variable('normal_mean_W', shape=[self.decode_dim, self.lstm_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_mean_b = tf.get_variable('normal_mean_b', shape=[self.lstm_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_cov_W = tf.get_variable('normal_cov_W', shape=[self.decode_dim, self.lstm_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_cov_b = tf.get_variable('normal_cov_b', shape=[self.lstm_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) normal_first = tf.matmul(self.conv_last_state,normal_first_W) + normal_first_b normal_second = tf.matmul(normal_first, normal_second_W) + normal_second_b normal_mean = tf.matmul(normal_second, normal_mean_W) + normal_mean_b normal_cov = tf.nn.softplus(tf.matmul(normal_second, normal_cov_W) + normal_cov_b) normal_sample = normal_mean + tf.multiply(tf.random_normal(shape=[self.batch_size,self.lstm_dim]), normal_cov) self.conv_last_state = self.conv_last_state + normal_sample # decoder with tf.variable_scope('linear'): decoder_input_W = tf.get_variable('sw', shape=[self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) decoder_input_b = tf.get_variable('sb', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_input = tf.matmul(self.conv_last_state, decoder_input_W) + decoder_input_b # answer->word predict self.embed_word_W = tf.get_variable('embed_word_W', shape=[self.decode_dim, self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.embed_word_b = tf.get_variable('embed_word_b', shape=[self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # # word dim -> decode_dim # self.word_to_lstm_w = tf.get_variable('word_to_lstm_W', shape=[self.input_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.word_to_lstm_b = tf.get_variable('word_to_lstm_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # decoder with tf.variable_scope('decoder'): self.decoder_r = tf.get_variable('decoder_r', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_z = tf.get_variable('decoder_z', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_w = tf.get_variable('decoder_w', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # embedding layer embedding = load_file(self.params['embedding']) self.Wemb = tf.constant(embedding, dtype=tf.float32) # generate training answer_train, train_loss, distribution_train = self.generate_answer_on_training() answer_test, test_loss, distribution_test = self.generate_answer_on_testing() # final variables = tf.trainable_variables() regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in variables]) self.answer_word_train = answer_train self.train_loss = train_loss + self.regularization_beta * regularization_cost self.distribution_word_train = distribution_train self.answer_word_test = answer_test self.test_loss = test_loss + self.regularization_beta * regularization_cost self.distribution_word_test = distribution_test self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rates = tf.train.exponential_decay(self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'], decay_rate=self.params['lr_decay_rate'], staircase=True) optimizer = tf.train.AdamOptimizer(learning_rates) self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)
def build_train_proc(self): # input layer (batch_size, n_steps, input_dim) self.encode_input = tf.placeholder(tf.float32, [None, self.max_n_words, self.input_dim]) self.encode_sent_len = tf.placeholder(tf.int32, [None]) self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size]) self.decode_input = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim]) self.decode_sent_len = tf.placeholder(tf.int32, [None]) self.is_training = tf.placeholder(tf.bool) self.reward = tf.placeholder(tf.float32, [None]) self.ans_vec = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim]) self.y = tf.placeholder(tf.int32, [None, self.max_r_words]) self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words]) # self.batch_size = tf.placeholder(tf.int32, []) self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training) self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training) sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(self.encode_input, self.word_lstm_dim, scope_name = 'sent_level_bilstm_rnn', input_len=self.encode_sent_len) sent_last_state = tf.concat([sent_state[0][1],sent_state[1][1]],axis=1) # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training) sent_outputs = tf.reshape(sent_outputs, shape=[self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim]) ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1) sent_last_outputs = tf.gather_nd(sent_outputs,indices=ind) conv_sents = tf.reshape(sent_last_state,shape = [self.batch_size, self.max_n_sentences, self.lstm_dim]) self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind) conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len) self.conv_last_state = conv_state[1] self.sent_features = sent_last_outputs self.conv_features = conv_outputs self.sent_features = tf.contrib.layers.dropout(self.sent_features, self.dropout_prob, is_training=self.is_training) self.conv_features = tf.contrib.layers.dropout(self.conv_features, self.dropout_prob, is_training=self.is_training) # with tf.variable_scope("ref_var"): # self.Wsi = tf.get_variable('Wsi', shape=[self.input_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.Wsh = tf.get_variable('Wsh', shape=[self.lstm_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.Wsq = tf.get_variable('Wsq', shape=[self.lstm_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.bias = tf.get_variable('bias', shape=[self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.Vs = tf.get_variable('Vs', shape=[self.ref_dim, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # def cond(idx, times,cell, sents, state,outputs,impotant_outputs): # return idx < times # # def body(idx, times, cell, sents, state,outputs,impotant_outputs): # idx = idx + 1 # sent = tf.reshape(sents[idx, :],shape=[1,self.lstm_dim]) # ref = tf.matmul(state[1], self.Wsh) + tf.matmul(sent, self.Wsi) + self.bias # condition = tf.sigmoid(tf.matmul(ref, self.Vs)) # prod = tf.squeeze(condition, 1) > 0.3 # (cell_output, state) = cell(sent, state) # outputs.append(cell_output) # return idx, times, cell, sents, state, outputs, impotant_outputs # # # # with tf.variable_scope("encode_conv_level"): # for batch in range(self.batch_size): # outputs = list() # impotant_outputs = list() # times = tf.cast(self.encode_conv_len[batch],tf.int32) # idx = 0 # sents = conv_sents[batch] # _, _, _, _, _ , outputs, impotant_outputs = tf.while_loop(cond,body,[idx, times, cell_first, sents, state_first,outputs,impotant_outputs]) # decoder # self.decoder_cell = tf.contrib.rnn.GRUCell(self.decode_dim) with tf.variable_scope('linear'): sent_and_conv_last = tf.concat([self.sent_last_state_trun,self.conv_last_state],axis=1) decoder_input_W = tf.get_variable('sw', shape=[self.sent_last_state_trun.shape[1] + self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) decoder_input_b = tf.get_variable('sb', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_input = tf.matmul(sent_and_conv_last, decoder_input_W) + decoder_input_b # answer->word predict self.embed_word_W = tf.get_variable('embed_word_W', shape=[self.decode_dim, self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.embed_word_b = tf.get_variable('embed_word_b', shape=[self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # # word dim -> decode_dim # self.word_to_lstm_w = tf.get_variable('word_to_lstm_W', shape=[self.input_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # self.word_to_lstm_b = tf.get_variable('word_to_lstm_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # decoder attention layer with tf.variable_scope('decoder_attention'): self.attention_w_x = tf.get_variable('attention_w_x', shape=[self.lstm_dim, self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_w_h = tf.get_variable('attention_w_h', shape=[self.decode_dim, self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_b = tf.get_variable('attention_b', shape=[self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_a = tf.get_variable('attention_a', shape=[self.attention_dim, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_to_decoder = tf.get_variable('attention_to_decoder', shape=[self.lstm_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # decoder with tf.variable_scope('decoder'): self.decoder_r = tf.get_variable('decoder_r', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_z = tf.get_variable('decoder_z', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_w = tf.get_variable('decoder_w', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # embedding layer embedding = load_file(self.params['embedding']) self.Wemb = tf.constant(embedding, dtype=tf.float32) # generate training answer_train, train_loss, distribution_train = self.generate_answer_on_training() answer_test, test_loss, distribution_test = self.generate_answer_on_testing() # final variables = tf.trainable_variables() regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in variables]) self.answer_word_train = answer_train self.train_loss = train_loss + self.regularization_beta * regularization_cost self.distribution_word_train = distribution_train self.answer_word_test = answer_test self.test_loss = test_loss + self.regularization_beta * regularization_cost self.distribution_word_test = distribution_test self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rates = tf.train.exponential_decay(self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'], decay_rate=self.params['lr_decay_rate'], staircase=True) optimizer = tf.train.AdamOptimizer(learning_rates) self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)
def build_train_proc(self): # input layer (batch_size, n_steps, input_dim) self.encode_input = tf.placeholder( tf.float32, [None, self.max_n_words, self.input_dim]) self.encode_sent_len = tf.placeholder(tf.int32, [None]) self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size]) self.decode_input = tf.placeholder( tf.float32, [None, self.max_r_words, self.input_dim]) self.decode_sent_len = tf.placeholder(tf.int32, [None]) self.is_training = tf.placeholder(tf.bool) self.reward = tf.placeholder(tf.float32, [None]) self.ans_vec = tf.placeholder( tf.float32, [None, self.max_r_words - 1, self.input_dim]) self.y = tf.placeholder(tf.int32, [None, self.max_r_words]) self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words]) # self.batch_size = tf.placeholder(tf.int32, []) # self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training) # self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training) sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer( self.encode_input, self.word_lstm_dim, scope_name='sent_level_bilstm_rnn', input_len=self.encode_sent_len) sent_last_state = tf.concat([sent_state[0][1], sent_state[1][1]], axis=1) # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training) sent_outputs = tf.reshape(sent_outputs, shape=[ self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim ]) ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1) sent_last_outputs = tf.gather_nd(sent_outputs, indices=ind) conv_sents = tf.reshape( sent_last_state, shape=[self.batch_size, self.max_n_sentences, self.lstm_dim]) self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind) conv_outputs, conv_state = layers.dynamic_origin_lstm_layer( conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len) self.conv_last_state = conv_state[1] self.sent_outputs = sent_outputs self.ind = ind self.sent_features = sent_last_outputs self.conv_features = conv_outputs # decoder self.decode_cell = tf.contrib.rnn.BasicLSTMCell(self.decode_dim, state_is_tuple=True) with tf.variable_scope('linear'): sent_and_conv_last = tf.concat( [self.sent_last_state_trun, self.conv_last_state], axis=1) decoder_input_W = tf.get_variable( 'sw', shape=[ self.sent_last_state_trun.shape[1] + self.conv_last_state.shape[1], self.decode_dim ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) decoder_input_b = tf.get_variable( 'sb', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_input = tf.matmul(sent_and_conv_last, decoder_input_W) + decoder_input_b # answer->word predict self.embed_word_W = tf.get_variable( 'embed_word_W', shape=[self.decode_dim, self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.embed_word_b = tf.get_variable( 'embed_word_b', shape=[self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # word dim -> decode_dim self.word_to_lstm_w = tf.get_variable( 'word_to_lstm_W', shape=[self.input_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.word_to_lstm_b = tf.get_variable( 'word_to_lstm_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # decoder attention layer with tf.variable_scope('decoder_attention'): self.attention_w_x = tf.get_variable( 'attention_w_x', shape=[self.lstm_dim, self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_w_h = tf.get_variable( 'attention_w_h', shape=[self.decode_dim, self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_b = tf.get_variable( 'attention_b', shape=[self.attention_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_a = tf.get_variable( 'attention_a', shape=[self.attention_dim, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.attention_to_decoder = tf.get_variable( 'attention_to_decoder', shape=[self.lstm_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # decoder with tf.variable_scope('decoder'): self.decoder_r = tf.get_variable( 'decoder_r', shape=[self.decode_dim * 3, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_z = tf.get_variable( 'decoder_z', shape=[self.decode_dim * 3, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_w = tf.get_variable( 'decoder_w', shape=[self.decode_dim * 3, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # embedding layer embedding = load_file(self.params['embedding']) self.Wemb = tf.constant(embedding, dtype=tf.float32) # generate training answer_train, train_loss = self.generate_answer_on_training() answer_test, test_loss = self.generate_answer_on_testing() # final variables = tf.trainable_variables() regularization_cost = tf.reduce_sum( [tf.nn.l2_loss(v) for v in variables]) self.answer_word_train = answer_train self.train_loss = train_loss + self.regularization_beta * regularization_cost self.answer_word_test = answer_test self.test_loss = test_loss + self.regularization_beta * regularization_cost self.global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rates = tf.train.exponential_decay( self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'], decay_rate=self.params['lr_decay_rate'], staircase=True) optimizer = tf.train.AdamOptimizer(learning_rates) self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)
def build_train_proc(self): # input layer (batch_size, n_steps, input_dim) self.encode_input = tf.placeholder( tf.float32, [None, self.max_n_words, self.input_dim]) self.encode_sent_len = tf.placeholder(tf.int32, [None]) self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size]) self.is_training = tf.placeholder(tf.bool) self.reward = tf.placeholder(tf.float32, [None]) self.ans_vec_forward = tf.placeholder( tf.float32, [None, self.max_r_f_words, self.input_dim]) self.y_forward = tf.placeholder(tf.int32, [None, self.max_r_f_words]) self.y_mask_forward = tf.placeholder(tf.float32, [None, self.max_r_f_words]) self.ans_vec_entire = tf.placeholder( tf.float32, [None, self.max_r_words, self.input_dim]) self.y_entire = tf.placeholder(tf.int32, [None, self.max_r_words]) self.y_mask_entire = tf.placeholder(tf.float32, [None, self.max_r_words]) self.y_forward_generation = tf.placeholder(tf.int32, [None, self.max_r_words]) # self.batch_size = tf.placeholder(tf.int32, []) self.encode_input = tf.contrib.layers.dropout( self.encode_input, self.dropout_prob, is_training=self.is_training) sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer( self.encode_input, self.word_lstm_dim, scope_name='sent_level_bilstm_rnn', input_len=self.encode_sent_len) sent_last_state = tf.concat([sent_state[0][1], sent_state[1][1]], axis=1) # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training) sent_outputs = tf.reshape(sent_outputs, shape=[ self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim ]) ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1) sent_last_outputs = tf.gather_nd(sent_outputs, indices=ind) conv_sents = tf.reshape( sent_last_state, shape=[self.batch_size, self.max_n_sentences, self.lstm_dim]) self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind) conv_outputs, conv_state = layers.dynamic_origin_lstm_layer( conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len) self.conv_last_state = conv_state[1] self.sent_features = sent_last_outputs self.conv_features = conv_outputs self.sent_features = tf.contrib.layers.dropout( self.sent_features, self.dropout_prob, is_training=self.is_training) self.conv_features = tf.contrib.layers.dropout( self.conv_features, self.dropout_prob, is_training=self.is_training) # decoder with tf.variable_scope('linear'): sent_and_conv_last = tf.concat( [self.sent_last_state_trun, self.conv_last_state], axis=1) decoder_input_W = tf.get_variable( 'sw', shape=[ self.sent_last_state_trun.shape[1] + self.conv_last_state.shape[1], self.decode_dim ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) decoder_input_b = tf.get_variable( 'sb', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.decoder_input = tf.matmul(sent_and_conv_last, decoder_input_W) + decoder_input_b self.define_var() # embedding layer embedding = load_file(self.params['embedding']) self.Wemb = tf.constant(embedding, dtype=tf.float32) # generate training forward_answer_train, forward_train_loss, forward_distribution_train = self.generate_forward_answer_on_training( ) entire_answer_train, entire_train_loss, entire_distribution_train = self.generate_entire_answer_on_training( ) forward_answer_test, forward_test_loss, forward_distribution_test = self.generate_forward_answer_on_testing( ) entire_answer_test, entire_test_loss, entire_distribution_test = self.generate_entire_answer_on_testing( ) # final variables = tf.trainable_variables() regularization_cost = tf.reduce_sum( [tf.nn.l2_loss(v) for v in variables]) self.forward_answer_word_train = forward_answer_train self.answer_word_train = entire_answer_train self.forward_train_loss = forward_train_loss + self.regularization_beta * regularization_cost self.train_loss = entire_train_loss + self.regularization_beta * regularization_cost self.distribution_word_train = entire_distribution_train self.forward_answer_word_test = forward_answer_test self.answer_word_test = entire_answer_test self.test_loss = entire_test_loss self.distribution_word_test = entire_distribution_test self.forward_global_step = tf.get_variable( 'forward_global_step', [], initializer=tf.constant_initializer(0), trainable=False) self.global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) forward_learning_rates = tf.train.exponential_decay( self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'], decay_rate=self.params['lr_decay_rate'], staircase=True) learning_rates = tf.train.exponential_decay( self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'], decay_rate=self.params['lr_decay_rate'], staircase=True) forward_optimizer = tf.train.AdamOptimizer(forward_learning_rates) optimizer = tf.train.AdamOptimizer(learning_rates) self.forward_train_proc = forward_optimizer.minimize( self.forward_train_loss, global_step=self.forward_global_step) self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)