def create_model(sess, config, name_scope, initializer=None): """ 创建判别模型:如果已经有训练好的,读入;否则,初始化 :param sess: :param config: :param name_scope: 也就是config.name_model :param initializer: :return: """ print(just("Creating disc model")) with tf.variable_scope(name_or_scope=name_scope, initializer=initializer): model = HierRNNModel(config=config, name_scope=name_scope) disc_ckpt_dir = os.path.abspath( os.path.join(config.train_dir, "checkpoints")) ckpt = tf.train.get_checkpoint_state(disc_ckpt_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print( just("Reading Hier Disc model parameters from %s" % ckpt.model_checkpoint_path)) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print(just("Created Hier Disc model with fresh parameters.")) disc_global_variables = [ gv for gv in tf.global_variables() if name_scope in gv.name ] sess.run(tf.variables_initializer(disc_global_variables)) return model
def get_dataset(gen_config): """ 获取训练数据 :return: vocab, rev_vocab, dev_set, train_set """ train_path = os.path.join(gen_config.train_dir, "chitchat.train") voc_file_path = [train_path + ".answer", train_path + ".query"] vocab_path = os.path.join(gen_config.train_dir, "vocab%d.all" % gen_config.vocab_size) data_utils.create_vocabulary(vocab_path, voc_file_path, gen_config.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary( vocab_path) # {dog: 0, cat: 1} [dog, cat] print(just("Preparing Chitchat gen_data in %s" % gen_config.train_dir)) train_query, train_answer, dev_query, dev_answer = data_utils.prepare_chitchat_data( gen_config.train_dir, vocab, gen_config.vocab_size) # Read disc_data into buckets and compute their sizes. print( just("Reading development and training gen_data (limit: %d)." % gen_config.max_train_data_size)) dev_set = read_data(gen_config, dev_query, dev_answer) train_set = read_data(gen_config, train_query, train_answer, gen_config.max_train_data_size) return vocab, rev_vocab, dev_set, train_set
def create_model(session, gen_config, forward_only, name_scope, initializer=None): """ 创建生成模型:如果已经有训练好的,读入;否则,初始化 """ print( just("Creating Gen model: %d layers of %d units." % (gen_config.num_layers, gen_config.emb_dim))) with tf.variable_scope(name_or_scope=name_scope, initializer=initializer): model = GenModel(gen_config, name_scope=name_scope, forward_only=forward_only) gen_ckpt_dir = os.path.abspath( os.path.join(gen_config.train_dir, "checkpoints")) ckpt = tf.train.get_checkpoint_state(gen_ckpt_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print( just("Reading Gen model parameters from %s" % ckpt.model_checkpoint_path)) model.saver.restore(session, ckpt.model_checkpoint_path) else: print(just("Created Gen model with fresh parameters.")) gen_global_variables = [ gv for gv in tf.global_variables() if name_scope in gv.name ] session.run(tf.variables_initializer(gen_global_variables)) return model
def _get_dataset(self): print(just("Prepare_data")) query_set, answer_set, gen_set = get_dataset(self.config_disc) train_bucket_sizes = [ len(query_set[b]) for b in xrange(len(self.config_disc.buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] return query_set, answer_set, gen_set, train_buckets_scale
def _get_dataset(self, gen_config): print(just("Prepare_data")) vocab, rev_vocab, dev_set, train_set = get_dataset(gen_config) train_bucket_sizes = [ len(train_set[b]) for b in xrange(len(gen_config.buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # 每个桶及前面的桶内数据占总数据的范围 return train_set, train_buckets_scale
def al_train(): gen_config = conf.gen_config disc_config = conf.disc_config adver_config = conf.adver_config with tf.Session() as sess: # ① 获取数据集 vocab, rev_vocab, dev_set, train_set = get_dataset(gen_config) for set in train_set: print("al train len: ", len(set)) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(gen_config.buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # ② 创建模型 disc_model = HierRNNTrain.create_model(sess, disc_config, disc_config.name_model) gen_model = GenTrain.create_model(sess, gen_config, forward_only=False, name_scope=gen_config.name_model) # [Ignore]... log相关 current_step = 0 step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0 gen_loss_summary = tf.Summary() disc_loss_summary = tf.Summary() gen_writer = tf.summary.FileWriter(gen_config.tensorboard_dir, sess.graph) disc_writer = tf.summary.FileWriter(disc_config.tensorboard_dir, sess.graph) # ③ 开始对抗训练 while current_step <= adver_config.max_train_step: start_time = time.time() # [Ignore]... log相关:开始时间 current_step += 1 bucket_id = get_random_bid(train_buckets_scale) # =========================================== ③.① 训练判别器 =========================================== # print(just("Update Discriminator: %d" % current_step)) # 1. 获取一个batch的真实数据 encoder_inputs, decoder_inputs, target_weights, source_inputs, source_outputs = get_batch( gen_model, train_set, bucket_id, gen_config.batch_size) # 2. 使用生成器生成负例数据,补充数据集 Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) train_query, train_answer, train_labels = disc_train_data(sess, gen_model, vocab, source_inputs, source_outputs, encoder_inputs, decoder_inputs, target_weights, bucket_id, mc_search=False) # [Ignore]... message print(just("mc_search: False")) if current_step % 200 == 0: print("train_query: ", len(train_query)) print("train_answer: ", len(train_answer)) print("train_labels: ", len(train_labels)) for i in xrange(len(train_query)): print("label: ", train_labels[i]) print("train_answer_sentence: ", train_answer[i]) print(" ".join([tf.compat.as_str(rev_vocab[output]) for output in train_answer[i]])) # 转置为 time-major train_query = np.transpose(train_query) train_answer = np.transpose(train_answer) # 3. 使用正反例训练判别器 Update D using (X, Y ) as positive examples and(X, ^Y) as negative examples _, disc_step_loss = HierRNNTrain().step(sess, bucket_id, disc_model, train_query, train_answer, train_labels, forward_only=False) disc_loss += disc_step_loss / disc_config.steps_per_checkpoint # =========================================== ③.② 训练生成器 =========================================== # print(just("Update Generator: %d" % current_step)) # 1. 获取一批真实数据 Sample (X,Y) from real disc_data update_gen_data = get_batch(gen_model, train_set, bucket_id, gen_config.batch_size) encoder, decoder, weights, source_inputs, source_outputs = update_gen_data # 2. 生成一批训练数据,包含自己生成的负例。生成负例时采用蒙特卡洛方法 # Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) with Monte Carlo search train_query, train_answer, train_labels = disc_train_data(sess, gen_model, vocab, source_inputs, source_outputs, encoder, decoder, weights, bucket_id, mc_search=True) # [Ignore]... message print(just("mc_search: True")) if current_step % 200 == 0: for i in xrange(len(train_query)): print("label: ", train_labels[i]) print(" ".join([tf.compat.as_str(rev_vocab[output]) for output in train_answer[i]])) train_query = np.transpose(train_query) train_answer = np.transpose(train_answer) # 3. 计算生成器(基于蒙特卡洛方法)生成的数据的奖励值:最终奖励是这一批数据中所有负例的奖励的平均值 # Compute Reward r for (X, ^Y ) using D.---based on Monte Carlo search reward, _ = HierRNNTrain().step(sess, bucket_id, disc_model, train_query, train_answer, train_labels, forward_only=True) batch_reward += reward / gen_config.steps_per_checkpoint print("step_reward: ", reward) # 4. 强化学习:使用奖励值r更新生成器 Update G on (X, ^Y ) using reward r # TODO(Zhu) 如何使用奖励值reward实现强化学习? gan_adjusted_loss, gen_step_loss, _ = GenTrain().step(gen_model, sess, encoder, decoder, weights, bucket_id, forward_only=False, # forward_only=False 训练模型 reward=reward, up_reward=True) # up_reward:使用reward gen_loss += gen_step_loss / gen_config.steps_per_checkpoint print("gen_step_loss: ", gen_step_loss) print("gen_step_adjusted_loss: ", gan_adjusted_loss) # 5. Teacher-Forcing: Update G on (X, Y ) 这时候就不需要设置up_reward为true了 t_adjusted_loss, t_step_loss, a = GenTrain().step(gen_model, sess, encoder, decoder, weights, bucket_id, forward_only=False) # forward_only=False 训练模型 t_loss += t_step_loss / gen_config.steps_per_checkpoint print("t_step_loss: ", t_step_loss) print("t_adjusted_loss", t_adjusted_loss) # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ # if current_step % gen_config.steps_per_checkpoint == 0: step_time += (time.time() - start_time) / gen_config.steps_per_checkpoint print("current_steps: %d, step time: %.4f, disc_loss: %.3f, gen_loss: %.3f, t_loss: %.3f, reward: %.3f" % (current_step, step_time, disc_loss, gen_loss, t_loss, batch_reward)) disc_loss_value = disc_loss_summary.value.add() disc_loss_value.tag = disc_config.name_loss disc_loss_value.simple_value = float(disc_loss) disc_writer.add_summary(disc_loss_summary, int(sess.run(disc_model.global_step))) gen_global_steps = sess.run(gen_model.global_step) gen_loss_value = gen_loss_summary.value.add() gen_loss_value.tag = gen_config.name_loss gen_loss_value.simple_value = float(gen_loss) t_loss_value = gen_loss_summary.value.add() t_loss_value.tag = gen_config.teacher_loss t_loss_value.simple_value = float(t_loss) batch_reward_value = gen_loss_summary.value.add() batch_reward_value.tag = gen_config.reward_name batch_reward_value.simple_value = float(batch_reward) gen_writer.add_summary(gen_loss_summary, int(gen_global_steps)) if current_step % (gen_config.steps_per_checkpoint * 2) == 0: print("current_steps: %d, save disc model" % current_step) disc_ckpt_dir = os.path.abspath(os.path.join(disc_config.train_dir, "checkpoints")) if not os.path.exists(disc_ckpt_dir): os.makedirs(disc_ckpt_dir) disc_model_path = os.path.join(disc_ckpt_dir, "disc.model") disc_model.saver.save(sess, disc_model_path, global_step=disc_model.global_step) print("current_steps: %d, save gen model" % current_step) gen_ckpt_dir = os.path.abspath(os.path.join(gen_config.train_dir, "checkpoints")) if not os.path.exists(gen_ckpt_dir): os.makedirs(gen_ckpt_dir) gen_model_path = os.path.join(gen_ckpt_dir, "gen.model") gen_model.saver.save(sess, gen_model_path, global_step=gen_model.global_step) step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0 sys.stdout.flush()
def pre_train(self): """ 预训练判别器 :return: """ print(just("Begin training")) with tf.Session() as session: # ① 创建模型 model = self.create_model(session, self.config_disc, name_scope=self.config_disc.name_model) # ② 获取数据集 self.query_set, \ self.answer_set, \ self.gen_set, \ self.train_buckets_scale = self._get_dataset() # [Ignore]... log相关 step_time, loss = 0.0, 0.0 current_step = 0 step_loss_summary = tf.Summary() disc_writer = tf.summary.FileWriter( self.config_disc.tensorboard_dir, session.graph) while current_step <= self.config_disc.max_pre_train_step: start_time = time.time() # [Ignore]... log相关:开始时间 # ③ 获取一个batch的训练数据 bucket_id = self._get_random_bid() train_query, train_answer, train_labels = self._get_batch( bucket_id) # ④ 获取处理后的输入数据 feed_dict = self._get_feed_dict(model, bucket_id, train_query, train_answer, train_labels) # ⑤ 选择训练OP,进行训练 fetches = [ model.b_train_op[bucket_id], model.b_logits[bucket_id], model.b_loss[bucket_id], model.target ] train_op, logits, step_loss, target = session.run( fetches, feed_dict) # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ # # log相关:运行时间 step_time += (time.time() - start_time ) / self.config_disc.steps_per_checkpoint loss += step_loss / self.config_disc.steps_per_checkpoint current_step += 1 # 每运行 config_disc.steps_per_checkpoint 次记录一下 if current_step % self.config_disc.steps_per_checkpoint == 0: # log相关 disc_loss_value = step_loss_summary.value.add() disc_loss_value.tag = self.config_disc.name_loss disc_loss_value.simple_value = float(loss) disc_writer.add_summary( step_loss_summary, int(session.run(model.global_step))) # softmax operation logits = np.transpose(softmax(np.transpose(logits))) reward = 0.0 for logit, label in zip(logits, train_labels): # ([1, 0], 1) reward += logit[1] # only for true probility reward = reward / len(train_labels) print("reward: ", reward) print("current_step: %d, step_loss: %.4f" % (current_step, step_loss)) if current_step % (self.config_disc.steps_per_checkpoint * 3) == 0: print("current_step: %d, save_model" % (current_step)) disc_ckpt_dir = os.path.abspath( os.path.join(self.config_disc.train_dir, "checkpoints")) if not os.path.exists(disc_ckpt_dir): os.makedirs(disc_ckpt_dir) disc_model_path = os.path.join(disc_ckpt_dir, "disc.model") model.saver.save(session, disc_model_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()
def pre_train(self, gen_config): """ 预训练生成器 :param gen_config: :return: """ print(just("Begin training")) with tf.Session() as sess: # ① 创建模型 model = self.create_model(sess, gen_config, forward_only=False, name_scope=gen_config.name_model) # ② 获取数据集 self.train_set, self.train_buckets_scale = self._get_dataset( gen_config) # [Ignore]... log相关 step_time, loss = 0.0, 0.0 current_step = 0 gen_loss_summary = tf.Summary() gen_writer = tf.summary.FileWriter(gen_config.tensorboard_dir, sess.graph) while current_step <= gen_config.max_pre_train_step: start_time = time.time() # [Ignore]... log相关:开始时间 # ③ 获取一个batch的训练数据 bucket_id = self._get_random_bid() encoder_inputs, decoder_inputs, target_weights, batch_source_encoder, batch_source_decoder = get_batch( model, self.train_set, bucket_id, gen_config.batch_size) # ④ 获取处理后的输入数据 # ⑤ 选择训练OP,进行训练 _, step_loss, _ = self.step(model, sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=False) # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ # # log相关:运行时间 step_time += (time.time() - start_time) / gen_config.steps_per_checkpoint loss += step_loss / gen_config.steps_per_checkpoint current_step += 1 # 每运行 config_disc.steps_per_checkpoint 次记录一下 if current_step % gen_config.steps_per_checkpoint == 0: # log相关 bucket_value = gen_loss_summary.value.add() bucket_value.tag = gen_config.name_loss bucket_value.simple_value = float(loss) gen_writer.add_summary(gen_loss_summary, int(model.global_step.eval())) # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. # if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): # sess.run(model.learning_rate_decay_op) # previous_losses.append(loss) # Save checkpoint and zero timer and loss. if current_step % (gen_config.steps_per_checkpoint * 3) == 0: print("current_step: %d, save model" % (current_step)) gen_ckpt_dir = os.path.abspath( os.path.join(gen_config.train_dir, "checkpoints")) if not os.path.exists(gen_ckpt_dir): os.makedirs(gen_ckpt_dir) checkpoint_path = os.path.join(gen_ckpt_dir, "chitchat.model") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. # for bucket_id in xrange(len(gen_config.buckets)): # encoder_inputs, decoder_inputs, target_weights = model.get_batch( # dev_set, bucket_id) # _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, # target_weights, bucket_id, True) # eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') # print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()