saver.restore(sess, ckpt.model_checkpoint_path)
        current_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        print(current_step)
    else:
        print('Created new model parameters..')
        current_step = 0

    total_loss = 0
    total_bleu = 0
    total_perpl = 0

    summary_writer = tf.summary.FileWriter(config.LOGS, graph=sess.graph)
    for e in range(config.EPOCHS):
        print("----- Epoch {}/{} -----".format(e + 1, config.EPOCHS))
        batches = getBatches(trainingSamples, config.BATCH_SIZE)
        if e != 0:
            checkpoint_path = os.path.join(config.MODEL_DIR, config.MODEL_NAME)
            saver.save(sess, checkpoint_path, e)

        for nextBatch in tqdm(batches, desc="Training"):
            if current_step == 150000:
                break
            else:
                loss, summary, pred, target = model.train(sess, nextBatch)
                bleu = get_bleu(target, pred)
                total_loss += loss
                total_perpl += 2**(float(loss))
                total_bleu += bleu

                current_step += 1
Beispiel #2
0
tf.app.flags.DEFINE_string('model_name', 'chatbot.ckpt', 'File name used for model checkpoints')
FLAGS = tf.app.flags.FLAGS

data_path = '/Users/shengwan/Desktop/seq2seq_chatbot_new-master/data/dataset-cornell-length10-filter1-vocabSize40000.pkl'
word2id, id2word, trainingSamples = loadDataset(data_path)

with tf.Session() as sess:
    model = Seq2SeqModel(FLAGS.rnn_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.learning_rate, word2id,
                         mode='train', use_attention=True, beam_search=False, beam_size=5, max_gradient_norm=5.0)
    ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print('Reloading model parameters from ', ckpt.model_checkpoint_path)
        sess = model.restore_last_session()
    else:
        print('Created new model parameters..')
        sess.run(tf.global_variables_initializer())
    current_step = 0
    summary_writer = tf.summary.FileWriter(FLAGS.model_dir, graph=sess.graph)
    for e in range(FLAGS.numEpochs):
        print("----- Epoch {}/{} -----".format(e + 1, FLAGS.numEpochs))
        batches = getBatches(trainingSamples, FLAGS.batch_size)
        for nextBatch in tqdm(batches, desc="Training"):
            loss, summary = model.train(sess, nextBatch)
            current_step += 1
            if current_step % FLAGS.steps_per_checkpoint == 0:
                perplexity = math.exp(float(loss)) if loss < 300 else float('inf')
                tqdm.write("----- Step %d -- Loss %.2f -- Perplexity %.2f" % (current_step, loss, perplexity))
                summary_writer.add_summary(summary, current_step)
                checkpoint_path = os.path.join(FLAGS.model_dir, FLAGS.model_name)
                model.saver.save(sess, checkpoint_path, global_step=current_step)
Beispiel #3
0
            ckpt = tf.train.get_checkpoint_state(model_dir)
            # 如果模型存在
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print('Reloading model parameters..')
                # 使用saver.restore()方法恢复变量
                model.saver.restore(sess, ckpt.model_checkpoint_path)
            else:  # 找不到模型
                raise ValueError('No such file:[{}]'.format(model_dir))  # 报错
        else:  # 从零开始训练模型
            sess.run(tf.global_variables_initializer())

        for e in range(epochs):  # 对于每一个epoch
            # 打印训练Epoch信息
            print("----- Epoch {}/{} -----".format(e + 1, epochs))
            # 将一个epoch中的数据制作成一个个batch,具体方法进入data_helpers.py查看
            batches = getBatches(sources_data, targets_data, batch_size)
            step = 0  # 记录训练了多少个batch
            for nextBatch in batches:  # 遍历batches中的每个batch
                # 将这个batch的数据喂给网络进行训练
                loss, summary = model.train(nextBatch)
                if step % display == 0:  # 每隔display个batch打印一次训练信息
                    # math.exp(x)返回e的x次方
                    # inf表示正无穷
                    # 计算困惑度
                    perplexity = math.exp(
                        float(loss)) if loss < 300 else float('inf')
                    # 打印loss和困惑度
                    print("----- Loss %.2f -- Perplexity %.2f" %
                          (loss, perplexity))
                step += 1  # 计数,记录训练了多少个batch
            # 保存当前网络模型参数