def _init_train(self): self.train_graph = tf.Graph() with self.train_graph.as_default(): self.train_in_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None]) self.train_in_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) self.train_target_seq = tf.placeholder( tf.int32, shape=[self.batch_size, None]) self.train_target_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) output = seq2seq.seq2seq(self.train_in_seq, self.train_in_seq_len, self.train_target_seq, self.train_target_seq_len, len(self.train_reader.vocabs), self.num_units, self.layers, self.dropout) self.train_output = tf.argmax(tf.nn.softmax(output), 2) self.loss = seq2seq.seq_loss(output, self.train_target_seq, self.train_target_seq_len) params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 0.5) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).apply_gradients( zip(clipped_gradients, params)) if self.param_histogram: for v in tf.trainable_variables(): tf.summary.histogram('train_' + v.name, v) tf.summary.scalar('loss', self.loss) self.train_summary = tf.summary.merge_all() self.train_init = tf.global_variables_initializer() self.train_saver = tf.train.Saver() self.train_session = tf.Session(graph=self.train_graph, config=self.gpu_session_config())
def _init_train(self): #Generate training map self.train_graph = tf.Graph() with self.train_graph.as_default(): self.train_in_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None]) self.train_in_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) self.train_target_seq = tf.placeholder( tf.int32, shape=[self.batch_size, None]) self.train_target_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) self.train_target_seq_new_weight = tf.placeholder( tf.float32, shape=[self.batch_size, None, len(self.train_vocabs)]) #Call the method of seq2seq, and analyze the method comprehensively in seq2seq output = seq2seq.seq2seq(self.train_in_seq, self.train_in_seq_len, self.train_target_seq, self.train_target_seq_len, len(self.train_reader.vocabs), self.num_units, self.layers, self.dropout) #For the output after encoding and decoding, choose the output with the maximum probability self.train_output = tf.argmax(tf.nn.softmax(output), 2) #Extract the loss value and modify the train here_ target_ Replace SEQ with train_ target_ seq_ new_ weight self.loss = seq2seq.seq_loss(output, self.train_target_seq_new_weight, self.train_target_seq_len) #Get trainable_ Name and value of variables params = tf.trainable_variables() #Miss derivation of params gradients = tf.gradients(self.loss, params) ''' tf.clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None) The ratio of the sum of the weight gradients is used to intercept the values of multiple tensors t_ List is a gradient tensor, clip_ Norm is the ratio of interceptions. This function returns the intercepted gradient tensor and a global norm of all tensors. t_ The update formula of list [i] is as follows:t_list[i] * clip_norm / max(global_norm, clip_norm) global_norm = sqrt(sum([l2norm(t)**2 for t in t_list])) global_ Norm is the sum of squares of all gradients, if clip_ norm > global_ Norm, no interception. ''' #To prevent gradients from disappearing or exploding clipped_gradients, _ = tf.clip_by_global_norm(gradients, 0.5) #Take care of the optimizer and iterate over these parameters self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).apply_gradients( zip(clipped_gradients, params)) if self.param_histogram: for v in tf.trainable_variables(): tf.summary.histogram('train_' + v.name, v) #Checking loss changes in tensorboard tf.summary.scalar('loss', self.loss) self.train_summary = tf.summary.merge_all() #The next is init and Saver operations self.train_init = tf.global_variables_initializer() self.train_saver = tf.train.Saver() #Put the graph in the training session self.train_session = tf.Session(graph=self.train_graph, config=self.gpu_session_config())
def _init_train(self): self.train_graph = tf.Graph() with self.train_graph.as_default(): # 设定训练输入数据,输出数据及长度 self.train_in_seq = tf.placeholder(tf.int32, shape=[self.batch_size, None]) self.train_in_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) self.train_target_seq = tf.placeholder( tf.int32, shape=[self.batch_size, None]) self.train_target_seq_len = tf.placeholder(tf.int32, shape=[self.batch_size]) # 利用seq2seq模型进行训练获得输出结果 output = seq2seq.seq2seq(self.train_in_seq, self.train_in_seq_len, self.train_target_seq, self.train_target_seq_len, len(self.train_reader.vocabs), self.num_units, self.layers, self.dropout) self.train_output = tf.argmax(tf.nn.softmax(output), 2) # 获取模型训练的损失 self.loss = seq2seq.seq_loss(output, self.train_target_seq, self.train_target_seq_len) # 获取训练的变量列表 params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) # clipped_gradients防止更新太快 clipped_gradients, _ = tf.clip_by_global_norm(gradients, 0.5) # train_op:优化 self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).apply_gradients( zip(clipped_gradients, params)) tf.summary.scalar('loss', self.loss) self.train_summary = tf.summary.merge_all() self.train_init = tf.global_variables_initializer() self.train_saver = tf.train.Saver() self.train_session = tf.Session(graph=self.train_graph, config=self.gpu_session_config())