Exemple #1
0
 def create_model(sess, config, name_scope, initializer=None):
     """
     创建判别模型:如果已经有训练好的,读入;否则,初始化
     :param sess:
     :param config:
     :param name_scope: 也就是config.name_model
     :param initializer:
     :return:
     """
     print(just("Creating disc model"))
     with tf.variable_scope(name_or_scope=name_scope,
                            initializer=initializer):
         model = HierRNNModel(config=config, name_scope=name_scope)
         disc_ckpt_dir = os.path.abspath(
             os.path.join(config.train_dir, "checkpoints"))
         ckpt = tf.train.get_checkpoint_state(disc_ckpt_dir)
         if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
             print(
                 just("Reading Hier Disc model parameters from %s" %
                      ckpt.model_checkpoint_path))
             model.saver.restore(sess, ckpt.model_checkpoint_path)
         else:
             print(just("Created Hier Disc model with fresh parameters."))
             disc_global_variables = [
                 gv for gv in tf.global_variables() if name_scope in gv.name
             ]
             sess.run(tf.variables_initializer(disc_global_variables))
         return model
def get_dataset(gen_config):
    """
    获取训练数据
    :return: vocab, rev_vocab, dev_set, train_set
    """
    train_path = os.path.join(gen_config.train_dir, "chitchat.train")
    voc_file_path = [train_path + ".answer", train_path + ".query"]
    vocab_path = os.path.join(gen_config.train_dir,
                              "vocab%d.all" % gen_config.vocab_size)
    data_utils.create_vocabulary(vocab_path, voc_file_path,
                                 gen_config.vocab_size)
    vocab, rev_vocab = data_utils.initialize_vocabulary(
        vocab_path)  # {dog: 0, cat: 1} [dog, cat]

    print(just("Preparing Chitchat gen_data in %s" % gen_config.train_dir))
    train_query, train_answer, dev_query, dev_answer = data_utils.prepare_chitchat_data(
        gen_config.train_dir, vocab, gen_config.vocab_size)

    # Read disc_data into buckets and compute their sizes.
    print(
        just("Reading development and training gen_data (limit: %d)." %
             gen_config.max_train_data_size))
    dev_set = read_data(gen_config, dev_query, dev_answer)
    train_set = read_data(gen_config, train_query, train_answer,
                          gen_config.max_train_data_size)

    return vocab, rev_vocab, dev_set, train_set
Exemple #3
0
 def create_model(session,
                  gen_config,
                  forward_only,
                  name_scope,
                  initializer=None):
     """
     创建生成模型:如果已经有训练好的,读入;否则,初始化
     """
     print(
         just("Creating Gen model: %d layers of %d units." %
              (gen_config.num_layers, gen_config.emb_dim)))
     with tf.variable_scope(name_or_scope=name_scope,
                            initializer=initializer):
         model = GenModel(gen_config,
                          name_scope=name_scope,
                          forward_only=forward_only)
         gen_ckpt_dir = os.path.abspath(
             os.path.join(gen_config.train_dir, "checkpoints"))
         ckpt = tf.train.get_checkpoint_state(gen_ckpt_dir)
         if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
             print(
                 just("Reading Gen model parameters from %s" %
                      ckpt.model_checkpoint_path))
             model.saver.restore(session, ckpt.model_checkpoint_path)
         else:
             print(just("Created Gen model with fresh parameters."))
             gen_global_variables = [
                 gv for gv in tf.global_variables() if name_scope in gv.name
             ]
             session.run(tf.variables_initializer(gen_global_variables))
         return model
Exemple #4
0
    def _get_dataset(self):
        print(just("Prepare_data"))

        query_set, answer_set, gen_set = get_dataset(self.config_disc)

        train_bucket_sizes = [
            len(query_set[b]) for b in xrange(len(self.config_disc.buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        return query_set, answer_set, gen_set, train_buckets_scale
Exemple #5
0
    def _get_dataset(self, gen_config):
        print(just("Prepare_data"))

        vocab, rev_vocab, dev_set, train_set = get_dataset(gen_config)

        train_bucket_sizes = [
            len(train_set[b]) for b in xrange(len(gen_config.buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]  # 每个桶及前面的桶内数据占总数据的范围

        return train_set, train_buckets_scale
Exemple #6
0
def al_train():

    gen_config = conf.gen_config
    disc_config = conf.disc_config
    adver_config = conf.adver_config

    with tf.Session() as sess:
        # ① 获取数据集
        vocab, rev_vocab, dev_set, train_set = get_dataset(gen_config)
        for set in train_set:
            print("al train len: ", len(set))

        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(gen_config.buckets))]
        train_total_size = float(sum(train_bucket_sizes))
        train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                               for i in xrange(len(train_bucket_sizes))]

        # ② 创建模型
        disc_model = HierRNNTrain.create_model(sess, disc_config, disc_config.name_model)
        gen_model = GenTrain.create_model(sess, gen_config, forward_only=False, name_scope=gen_config.name_model)

        # [Ignore]... log相关
        current_step = 0
        step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0
        gen_loss_summary = tf.Summary()
        disc_loss_summary = tf.Summary()
        gen_writer = tf.summary.FileWriter(gen_config.tensorboard_dir, sess.graph)
        disc_writer = tf.summary.FileWriter(disc_config.tensorboard_dir, sess.graph)

        # ③ 开始对抗训练
        while current_step <= adver_config.max_train_step:
            start_time = time.time() # [Ignore]... log相关:开始时间
            current_step += 1

            bucket_id = get_random_bid(train_buckets_scale)

            # =========================================== ③.① 训练判别器 =========================================== #
            print(just("Update Discriminator: %d" % current_step))

            # 1. 获取一个batch的真实数据
            encoder_inputs, decoder_inputs, target_weights, source_inputs, source_outputs = get_batch(
                gen_model, train_set, bucket_id, gen_config.batch_size)

            # 2. 使用生成器生成负例数据,补充数据集 Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X)
            train_query, train_answer, train_labels = disc_train_data(sess, gen_model, vocab, source_inputs,
                                                                      source_outputs,
                                                                      encoder_inputs, decoder_inputs, target_weights,
                                                                      bucket_id, mc_search=False)

            # [Ignore]... message
            print(just("mc_search: False"))
            if current_step % 200 == 0:
                print("train_query: ", len(train_query))
                print("train_answer: ", len(train_answer))
                print("train_labels: ", len(train_labels))
                for i in xrange(len(train_query)):
                    print("label: ", train_labels[i])
                    print("train_answer_sentence: ", train_answer[i])
                    print(" ".join([tf.compat.as_str(rev_vocab[output]) for output in train_answer[i]]))

            # 转置为 time-major
            train_query = np.transpose(train_query)
            train_answer = np.transpose(train_answer)

            # 3. 使用正反例训练判别器 Update D using (X, Y ) as positive examples and(X, ^Y) as negative examples
            _, disc_step_loss = HierRNNTrain().step(sess, bucket_id, disc_model, train_query, train_answer, train_labels,
                                                    forward_only=False)
            disc_loss += disc_step_loss / disc_config.steps_per_checkpoint

            # =========================================== ③.② 训练生成器 =========================================== #
            print(just("Update Generator: %d" % current_step))

            # 1. 获取一批真实数据 Sample (X,Y) from real disc_data
            update_gen_data = get_batch(gen_model, train_set, bucket_id, gen_config.batch_size)
            encoder, decoder, weights, source_inputs, source_outputs = update_gen_data

            # 2. 生成一批训练数据,包含自己生成的负例。生成负例时采用蒙特卡洛方法
            # Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) with Monte Carlo search
            train_query, train_answer, train_labels = disc_train_data(sess, gen_model, vocab, source_inputs,
                                                                      source_outputs,
                                                                      encoder, decoder, weights, bucket_id,
                                                                      mc_search=True)

            # [Ignore]... message
            print(just("mc_search: True"))
            if current_step % 200 == 0:
                for i in xrange(len(train_query)):
                    print("label: ", train_labels[i])
                    print(" ".join([tf.compat.as_str(rev_vocab[output]) for output in train_answer[i]]))

            train_query = np.transpose(train_query)
            train_answer = np.transpose(train_answer)

            # 3. 计算生成器(基于蒙特卡洛方法)生成的数据的奖励值:最终奖励是这一批数据中所有负例的奖励的平均值
            # Compute Reward r for (X, ^Y ) using D.---based on Monte Carlo search
            reward, _ = HierRNNTrain().step(sess, bucket_id, disc_model, train_query, train_answer, train_labels,
                                            forward_only=True)
            batch_reward += reward / gen_config.steps_per_checkpoint
            print("step_reward: ", reward)

            # 4. 强化学习:使用奖励值r更新生成器 Update G on (X, ^Y ) using reward r
            # TODO(Zhu) 如何使用奖励值reward实现强化学习?
            gan_adjusted_loss, gen_step_loss, _ = GenTrain().step(gen_model, sess, encoder, decoder, weights, bucket_id,
                                                                  forward_only=False, # forward_only=False 训练模型
                                                                  reward=reward, up_reward=True) # up_reward:使用reward
            gen_loss += gen_step_loss / gen_config.steps_per_checkpoint

            print("gen_step_loss: ", gen_step_loss)
            print("gen_step_adjusted_loss: ", gan_adjusted_loss)

            # 5. Teacher-Forcing: Update G on (X, Y ) 这时候就不需要设置up_reward为true了
            t_adjusted_loss, t_step_loss, a = GenTrain().step(gen_model, sess, encoder, decoder, weights, bucket_id,
                                                              forward_only=False)  # forward_only=False 训练模型
            t_loss += t_step_loss / gen_config.steps_per_checkpoint

            print("t_step_loss: ", t_step_loss)
            print("t_adjusted_loss", t_adjusted_loss)

            # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ #

            if current_step % gen_config.steps_per_checkpoint == 0:
                step_time += (time.time() - start_time) / gen_config.steps_per_checkpoint

                print("current_steps: %d, step time: %.4f, disc_loss: %.3f, gen_loss: %.3f, t_loss: %.3f, reward: %.3f"
                      % (current_step, step_time, disc_loss, gen_loss, t_loss, batch_reward))

                disc_loss_value = disc_loss_summary.value.add()
                disc_loss_value.tag = disc_config.name_loss
                disc_loss_value.simple_value = float(disc_loss)
                disc_writer.add_summary(disc_loss_summary, int(sess.run(disc_model.global_step)))

                gen_global_steps = sess.run(gen_model.global_step)
                gen_loss_value = gen_loss_summary.value.add()
                gen_loss_value.tag = gen_config.name_loss
                gen_loss_value.simple_value = float(gen_loss)
                t_loss_value = gen_loss_summary.value.add()
                t_loss_value.tag = gen_config.teacher_loss
                t_loss_value.simple_value = float(t_loss)
                batch_reward_value = gen_loss_summary.value.add()
                batch_reward_value.tag = gen_config.reward_name
                batch_reward_value.simple_value = float(batch_reward)
                gen_writer.add_summary(gen_loss_summary, int(gen_global_steps))

                if current_step % (gen_config.steps_per_checkpoint * 2) == 0:
                    print("current_steps: %d, save disc model" % current_step)
                    disc_ckpt_dir = os.path.abspath(os.path.join(disc_config.train_dir, "checkpoints"))
                    if not os.path.exists(disc_ckpt_dir):
                        os.makedirs(disc_ckpt_dir)
                    disc_model_path = os.path.join(disc_ckpt_dir, "disc.model")
                    disc_model.saver.save(sess, disc_model_path, global_step=disc_model.global_step)

                    print("current_steps: %d, save gen model" % current_step)
                    gen_ckpt_dir = os.path.abspath(os.path.join(gen_config.train_dir, "checkpoints"))
                    if not os.path.exists(gen_ckpt_dir):
                        os.makedirs(gen_ckpt_dir)
                    gen_model_path = os.path.join(gen_ckpt_dir, "gen.model")
                    gen_model.saver.save(sess, gen_model_path, global_step=gen_model.global_step)

                step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0
                sys.stdout.flush()
Exemple #7
0
    def pre_train(self):
        """
        预训练判别器
        :return:
        """
        print(just("Begin training"))
        with tf.Session() as session:
            # ① 创建模型
            model = self.create_model(session,
                                      self.config_disc,
                                      name_scope=self.config_disc.name_model)

            # ② 获取数据集
            self.query_set, \
            self.answer_set, \
            self.gen_set, \
            self.train_buckets_scale = self._get_dataset()

            # [Ignore]... log相关
            step_time, loss = 0.0, 0.0
            current_step = 0
            step_loss_summary = tf.Summary()
            disc_writer = tf.summary.FileWriter(
                self.config_disc.tensorboard_dir, session.graph)

            while current_step <= self.config_disc.max_pre_train_step:
                start_time = time.time()  # [Ignore]... log相关:开始时间

                # ③ 获取一个batch的训练数据
                bucket_id = self._get_random_bid()
                train_query, train_answer, train_labels = self._get_batch(
                    bucket_id)

                # ④ 获取处理后的输入数据
                feed_dict = self._get_feed_dict(model, bucket_id, train_query,
                                                train_answer, train_labels)

                # ⑤ 选择训练OP,进行训练
                fetches = [
                    model.b_train_op[bucket_id], model.b_logits[bucket_id],
                    model.b_loss[bucket_id], model.target
                ]
                train_op, logits, step_loss, target = session.run(
                    fetches, feed_dict)

                # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ #

                # log相关:运行时间
                step_time += (time.time() - start_time
                              ) / self.config_disc.steps_per_checkpoint
                loss += step_loss / self.config_disc.steps_per_checkpoint
                current_step += 1

                # 每运行 config_disc.steps_per_checkpoint 次记录一下
                if current_step % self.config_disc.steps_per_checkpoint == 0:
                    # log相关
                    disc_loss_value = step_loss_summary.value.add()
                    disc_loss_value.tag = self.config_disc.name_loss
                    disc_loss_value.simple_value = float(loss)
                    disc_writer.add_summary(
                        step_loss_summary, int(session.run(model.global_step)))

                    # softmax operation
                    logits = np.transpose(softmax(np.transpose(logits)))
                    reward = 0.0
                    for logit, label in zip(logits,
                                            train_labels):  # ([1, 0], 1)
                        reward += logit[1]  # only for true probility
                    reward = reward / len(train_labels)
                    print("reward: ", reward)

                    print("current_step: %d, step_loss: %.4f" %
                          (current_step, step_loss))
                    if current_step % (self.config_disc.steps_per_checkpoint *
                                       3) == 0:
                        print("current_step: %d, save_model" % (current_step))
                        disc_ckpt_dir = os.path.abspath(
                            os.path.join(self.config_disc.train_dir,
                                         "checkpoints"))
                        if not os.path.exists(disc_ckpt_dir):
                            os.makedirs(disc_ckpt_dir)
                        disc_model_path = os.path.join(disc_ckpt_dir,
                                                       "disc.model")
                        model.saver.save(session,
                                         disc_model_path,
                                         global_step=model.global_step)

                    step_time, loss = 0.0, 0.0
                    sys.stdout.flush()
Exemple #8
0
    def pre_train(self, gen_config):
        """
        预训练生成器
        :param gen_config:
        :return:
        """
        print(just("Begin training"))
        with tf.Session() as sess:
            # ① 创建模型
            model = self.create_model(sess,
                                      gen_config,
                                      forward_only=False,
                                      name_scope=gen_config.name_model)

            # ② 获取数据集
            self.train_set, self.train_buckets_scale = self._get_dataset(
                gen_config)

            # [Ignore]... log相关
            step_time, loss = 0.0, 0.0
            current_step = 0
            gen_loss_summary = tf.Summary()
            gen_writer = tf.summary.FileWriter(gen_config.tensorboard_dir,
                                               sess.graph)

            while current_step <= gen_config.max_pre_train_step:
                start_time = time.time()  # [Ignore]... log相关:开始时间

                # ③ 获取一个batch的训练数据
                bucket_id = self._get_random_bid()
                encoder_inputs, decoder_inputs, target_weights, batch_source_encoder, batch_source_decoder = get_batch(
                    model, self.train_set, bucket_id, gen_config.batch_size)

                # ④ 获取处理后的输入数据
                # ⑤ 选择训练OP,进行训练
                _, step_loss, _ = self.step(model,
                                            sess,
                                            encoder_inputs,
                                            decoder_inputs,
                                            target_weights,
                                            bucket_id,
                                            forward_only=False)

                # ================================ [Ignore]... log相关: 记录日志、保存变量 ================================ #

                # log相关:运行时间
                step_time += (time.time() -
                              start_time) / gen_config.steps_per_checkpoint
                loss += step_loss / gen_config.steps_per_checkpoint
                current_step += 1

                # 每运行 config_disc.steps_per_checkpoint 次记录一下
                if current_step % gen_config.steps_per_checkpoint == 0:
                    # log相关
                    bucket_value = gen_loss_summary.value.add()
                    bucket_value.tag = gen_config.name_loss
                    bucket_value.simple_value = float(loss)
                    gen_writer.add_summary(gen_loss_summary,
                                           int(model.global_step.eval()))

                    # Print statistics for the previous epoch.
                    perplexity = math.exp(loss) if loss < 300 else float('inf')
                    print(
                        "global step %d learning rate %.4f step-time %.2f perplexity "
                        "%.2f" %
                        (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    # if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                    #     sess.run(model.learning_rate_decay_op)
                    # previous_losses.append(loss)
                    # Save checkpoint and zero timer and loss.

                    if current_step % (gen_config.steps_per_checkpoint *
                                       3) == 0:
                        print("current_step: %d, save model" % (current_step))
                        gen_ckpt_dir = os.path.abspath(
                            os.path.join(gen_config.train_dir, "checkpoints"))
                        if not os.path.exists(gen_ckpt_dir):
                            os.makedirs(gen_ckpt_dir)
                        checkpoint_path = os.path.join(gen_ckpt_dir,
                                                       "chitchat.model")
                        model.saver.save(sess,
                                         checkpoint_path,
                                         global_step=model.global_step)

                    step_time, loss = 0.0, 0.0
                    # Run evals on development set and print their perplexity.
                    # for bucket_id in xrange(len(gen_config.buckets)):
                    #   encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    #       dev_set, bucket_id)
                    #   _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                    #                                target_weights, bucket_id, True)
                    #   eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')
                    #   print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))
                    sys.stdout.flush()