Esempio n. 1
0
def main():
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)

    # 计算一个epoch需要训练的次数
    train_data_len = len(train_data)
    train_batch_len = train_data_len // TRAIN_BATCH_SIZE
    train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
    valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP

    test_data_len = len(test_data)
    test_batch_len = test_data_len // EVAL_BATCH_SIZE
    test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP

    initializer = tf.random_uniform_initializer(-0.05, 0.05)
    with tf.variable_scope("language_model",
                           reuse=None,
                           initializer=initializer):
        train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)

    with tf.variable_scope("language_model",
                           reuse=True,
                           initializer=initializer):
        eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)

    # 训练模型。
    with tf.Session() as session:
        tf.global_variables_initializer().run()

        train_queue = reader.ptb_producer(train_data, train_model.batch_size,
                                          train_model.num_steps)
        eval_queue = reader.ptb_producer(valid_data, eval_model.batch_size,
                                         eval_model.num_steps)
        test_queue = reader.ptb_producer(test_data, eval_model.batch_size,
                                         eval_model.num_steps)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)

        for i in range(NUM_EPOCH):
            print("In iteration: %d" % (i + 1))
            run_epoch(session, train_model, train_queue, train_model.train_op,
                      True, train_epoch_size)

            valid_perplexity = run_epoch(session, eval_model, eval_queue,
                                         tf.no_op(), False, valid_epoch_size)
            print("Epoch: %d Validation Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, eval_model, test_queue,
                                    tf.no_op(), False, test_epoch_size)
        print("Test Perplexity: %.3f" % test_perplexity)
        tf.summary.FileWriter(
            'E:/python/CNN_test/src/tensorflow/tensorboard/simple.log',
            graph_def=session.graph_def)
        coord.request_stop()
        coord.join(threads)
Esempio n. 2
0
def main():
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)
    # 计算一个epoch需要训练的次数"
    train_data_len = len(train_data)
    train_batch_len = train_data_len // Train_Batch_Size
    train_epoch_size = (train_batch_len - 1) // Train_Num_Step

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // Eval_Batch_Size
    valid_epoch_size = (valid_batch_len - 1) // Eval_Num_Step

    test_data_len = len(test_data)
    test_batch_len = test_data_len // Eval_Batch_Size
    test_epoch_size = (test_batch_len - 1) // Eval_Num_Step

    initializer = tf.random_uniform_initializer(-0.05, 0.05)
    with tf.variable_scope("language_model",
                           reuse=None,
                           initializer=initializer):
        train_model = PTBModel(True, Train_Batch_Size, Train_Num_Step)
    with tf.variable_scope("language_model",
                           reuse=True,
                           initializer=initializer):
        eval_model = PTBModel(False, Eval_Batch_Size, Eval_Num_Step)
    # 训练模型。

    with tf.Session() as session:
        tf.global_variables_initializer().run()

        train_queue = reader.ptb_producer(train_data, Train_Batch_Size,
                                          Train_Num_Step)
        eval_queue = reader.ptb_producer(valid_data, Eval_Batch_Size,
                                         Eval_Num_Step)
        test_queue = reader.ptb_producer(test_data, Eval_Batch_Size,
                                         Eval_Num_Step)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)

        for i in range(Num_Epoch):
            print(i)
            print("In iteration: %d" % (i + 1))
            run_epoch(session, train_model, train_queue, train_model.train_op,
                      True, train_epoch_size)

            valid_perplexity = run_epoch(session, eval_model, eval_queue,
                                         tf.no_op(), False, valid_epoch_size)
            print("Epoch: %d Validation Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, eval_model, test_queue,
                                    tf.no_op(), False, test_epoch_size)
        print("Test Perplexity: %.3f" % test_perplexity)

        coord.request_stop()
        coord.join(threads)
Esempio n. 3
0
 def __init__(self, config, data, name=None):
     self.batch_size = batch_size = config.batch_size
     self.num_steps = num_steps = config.num_steps
     self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
     self.input_data, self.targets = reader.ptb_producer(data,
                                                         batch_size,
                                                         num_steps,
                                                         name=name)
Esempio n. 4
0
def testPtbProducer():
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(".\data\data")
    raw_data = train_data
    batch_size = 3
    num_steps = 2
    x, y = reader.ptb_producer(raw_data, batch_size, num_steps)
    with tf.Session() as session:
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(session, coord=coord)
        try:
            for i in range(10):
                xval, yval = session.run([x, y])
                print(xval, yval)
        finally:
            coord.request_stop()
            coord.join()
Esempio n. 5
0
 def testPtbProducer(self):
     raw_data = [4, 3, 2, 1, 0, 5, 6, 1, 1, 1, 1, 0, 3, 4, 1]
     batch_size = 3
     num_steps = 2
     x, y = reader.ptb_producer(raw_data, batch_size, num_steps)
     with self.test_session() as session:
         coord = tf.train.Coordinator()
         tf.train.start_queue_runners(session, coord=coord)
         try:
             xval, yval = session.run([x, y])
             self.assertAllEqual(xval, [[4, 3], [5, 6], [1, 0]])
             self.assertAllEqual(yval, [[3, 2], [6, 1], [0, 3]])
             xval, yval = session.run([x, y])
             self.assertAllEqual(xval, [[2, 1], [1, 1], [3, 4]])
             self.assertAllEqual(yval, [[1, 0], [1, 1], [4, 1]])
         finally:
             coord.request_stop()
             coord.join()
Esempio n. 6
0
 def testPtbProducer(self):
   raw_data = [4, 3, 2, 1, 0, 5, 6, 1, 1, 1, 1, 0, 3, 4, 1]
   batch_size = 3
   num_steps = 2
   x, y = reader.ptb_producer(raw_data, batch_size, num_steps)
   with self.test_session() as session:
     coord = tf.train.Coordinator()
     tf.train.start_queue_runners(session, coord=coord)
     try:
       xval, yval = session.run([x, y])
       self.assertAllEqual(xval, [[4, 3], [5, 6], [1, 0]])
       self.assertAllEqual(yval, [[3, 2], [6, 1], [0, 3]])
       xval, yval = session.run([x, y])
       self.assertAllEqual(xval, [[2, 1], [1, 1], [3, 4]])
       self.assertAllEqual(yval, [[1, 0], [1, 1], [4, 1]])
     finally:
       coord.request_stop()
       coord.join()
Esempio n. 7
0
 def __init__(self, config, data, name=None):
   self.batch_size = batch_size = config.batch_size
   self.num_steps = num_steps = config.num_steps
   self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
   self.input_data, self.targets = reader.ptb_producer(
       data, batch_size, num_steps, name=name)
Esempio n. 8
0
#     for i in range(nb_epochs):
#         input_gen = reader.ptb_iterator(id_corpus, corpus_length // 4, num_steps)
#         for x_batch, y_true_batch in input_gen:
#             to_compute = [train_op, loss_op, global_step_tensor]
#             feed_dict = {
#                 x: x_batch,
#                 y_true: y_true_batch
#             }
#             _, loss, global_step = sess.run(to_compute, feed_dict=feed_dict)

#             if global_step % 100 == 0:
#                 print('Iteration %d/%d - loss:%f' % (global_step, nb_epochs, loss))


input_gen = reader.ptb_producer(id_corpus, batch_size, num_steps)
def feed_function():
    # learn.train creates a session accessible in this function scope
    x_batch = input_gen[0].eval()
    y_true_batch = input_gen[1].eval()
    return {
        x: x_batch,
        y_true: y_true_batch
    }

final_loss = tf.contrib.learn.train(
    graph,
    dir + "/results",
    train_op,
    loss_op,
    global_step_tensor=global_step_tensor,
Esempio n. 9
0
def main(_):
    # 获取原始数据
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)

    # 计算一个epoch需要训练的次数
    train_data_len = len(train_data)
    train_batch_len = train_data_len // TRAIN_BATCH_SIZE
    train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
    valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP

    test_data_len = len(test_data)
    test_batch_len = test_data_len // EVAL_BATCH_SIZE
    test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP

    # 定义初始化函数
    initializer = tf.random_uniform_initializer(-0.05, 0.05)
    # 定义训练用的循环神经网络模型
    with tf.variable_scope("language_model",
                           reuse=None,
                           initializer=initializer):
        train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)

    # 定义评测用的循环神经网络模型
    with tf.variable_scope("language_model",
                           reuse=True,
                           initializer=initializer):
        eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)

    with tf.Session() as session:
        tf.global_variables_initializer().run()

        train_queue = reader.ptb_producer(train_data, train_model.batch_size,
                                          train_model.num_steps)
        eval_queue = reader.ptb_producer(valid_data, eval_model.batch_size,
                                         eval_model.num_steps)
        test_queue = reader.ptb_producer(test_data, eval_model.batch_size,
                                         eval_model.num_steps)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)

        # 使用训练数据训练模型
        for i in range(NUM_EPOCH):
            print "In iteration: {}".format(i + 1)

            run_epoch(session, train_model, train_queue, train_model.train_op,
                      True, train_epoch_size)

            valid_perplexity = run_epoch(session, eval_model, eval_queue,
                                         tf.no_op(), False, valid_epoch_size)

            print "Epoch: {} Validation Perplexity: {:.3f}".format(
                i + 1, valid_perplexity)

        # 最后使用测试数据测试模型效果
        test_perplexity = run_epoch(session, eval_model, test_queue,
                                    tf.no_op(), False, test_epoch_size)
        print "Test Perplexity: {:.3f}".format(test_perplexity)

        coord.request_stop()
        coord.join(threads)
Esempio n. 10
0
def main(argv=None):
    # 获取数据源
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)
    #定义初始化函数
    initializer = tf.random_uniform_initializer(-0.05, 0.05)

    # 计算一个epoch需要训练的次数
    train_data_len = len(train_data)
    train_batch_len = train_data_len // TRAINING_BATCH_SIZE
    train_epoch_size = (train_batch_len - 1) // TRAINING_NUM_STEP

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
    valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP

    test_data_len = len(test_data)
    test_batch_len = test_data_len // EVAL_BATCH_SIZE
    test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP

    #定义训练使用的循环神经网络
    with tf.variable_scope("language_model",
                           reuse=None,
                           initializer=initializer):
        train_model = PTBModel(True, TRAINING_BATCH_SIZE, TRAINING_NUM_STEP)

    #定义评测用的模型
    with tf.variable_scope("language_model",
                           reuse=True,
                           initializer=initializer):
        eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)

    with tf.Session() as session:
        tf.global_variables_initializer().run()

        train_d = reader.ptb_producer(train_data, TRAINING_BATCH_SIZE,
                                      TRAINING_NUM_STEP)
        valid_d = reader.ptb_producer(valid_data, EVAL_BATCH_SIZE,
                                      EVAL_NUM_STEP)
        test_d = reader.ptb_producer(test_data, EVAL_BATCH_SIZE, EVAL_NUM_STEP)

        # 开启多线程
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)

        #使用训练数据训练模型
        for i in range(NUM_EPOCH):
            print("in iteration: %d" % (i + 1))
            #在所有训练数据上训练循环神经网络
            run_epoch(session, train_model, train_d, train_model.train_op,
                      train_epoch_size, True)

            #使用验证数据评测模型效果
            valid_perplexity = run_epoch(session, eval_model, valid_d,
                                         tf.no_op(), valid_epoch_size, False)
            print("Epoch: %d Validation Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        #使用测试数据测试模型
        test_perplexity = run_epoch(session, eval_model, test_d, tf.no_op(),
                                    test_epoch_size, False)
        print("Test Perplexity: %.3f" % test_perplexity)

        # 关闭多线程
        coord.request_stop()
        coord.join(threads)