def inputs(eval_data): """测试数据 """ if not data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(data_dir,'cifar-10-batches-bin') return CIFAR10_input.inputs(eval_data=eval_data,data_dir=data_dir,batch_size=batch_size)
def evaluate(): """eval models of CIFAR10""" # build a new graph and make it default for eval with tf.Graph().as_default(): with tf.name_scope('input'): # read data for testing if not CIFAR10_train.data_dir: raise ValueError('Please supply a data_dir') images, labels = CIFAR10_input.inputs( eval_data=True, data_dir=CIFAR10_train.data_dir, batch_size=CIFAR10_train.FLAGS.BATCH_SIZE) # 直接通过调用函数来计算前向传播的结果。因为测试时不关注正则化损失的值 # 所以这里用于计算正则化损失的函数被设为None logits = CIFAR10_inference.inference(input_tensor=images, train=False, regularizer=None) with tf.name_scope('accuracy'): top_k_op = tf.nn.in_top_k(logits, labels, 1) # 通过变量重命名的方式来加载模型,这样在前向传播的过程中就不需要调用滑动平均的函数来获取平均值了。 variable_averages = tf.train.ExponentialMovingAverage( CIFAR10_train.FLAGS.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) merged = tf.summary.merge_all() writer = tf.summary.FileWriter('../log_test', tf.get_default_graph()) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) while True: # 下面这个函数会通过checkpoint文件自动找到目录中最新模型的文件名。 ckpt = tf.train.get_checkpoint_state( CIFAR10_train.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: # 加载模型 saver.restore(sess, ckpt.model_checkpoint_path) # 通过文件名得到模型保存时迭代的轮数 global_step = ckpt.model_checkpoint_path.split( '/')[-1].split('-')[-1] else: print('No checkpoint file found') return # result = sess.run(merged, feed_dict={: xs_test, y_: ys_onehot}) # writer.add_summary(result, global_step) num_iter = int( math.ceil(FLAGS.num_examples / CIFAR10_train.FLAGS.BATCH_SIZE)) true_count = 0 total_sample_count = num_iter * CIFAR10_train.FLAGS.BATCH_SIZE i = 0 while i < num_iter and not coord.should_stop(): predictions = sess.run([top_k_op]) true_count += np.sum(predictions) i += 1 prediction = true_count / total_sample_count print('After %s steps training, validation accuracy=%g' % (global_step, prediction)) summary = tf.Summary() summary.ParseFromString(sess.run(merged)) summary.value.add(tag='accuracy_test', simple_value=prediction) writer.add_summary(summary, global_step) time.sleep(FLAGS.EVAL_INTERVAL_SECS) coord.request_stop() coord.join(threads) writer.close()
def CIFAR10_train(): # 将处理输入数据的计算都放在名字为'input'的命名空间下 with tf.name_scope('input'): # 读取数据 images_train, lables_train = CIFAR10_input.distorted_inputs( data_dir=data_dir, batch_size=FLAGS.BATCH_SIZE) images_test, lables_test = CIFAR10_input.inputs( eval_data=True, data_dir=data_dir, batch_size=FLAGS.BATCH_SIZE) # 定义输入输出placeholder x = tf.placeholder(tf.float32, [ None, CIFAR10_inference.IMAGE_SIZE, CIFAR10_inference.IMAGE_SIZE, CIFAR10_inference.NUM_CHANNELS ], name='x-input') y_ = tf.placeholder(tf.float32, [None, CIFAR10_inference.OUTPUT_NODE], name='y-input') # 使用LeNet5_inference定义的前向传播 y = CIFAR10_inference.inference(x, True, 'L2') global_step = tf.Variable(0, trainable=False) # 将处理滑动平均相关的计算都放在一个命名空间下 with tf.name_scope('moving_average'): # 定义滑动平均操作 variable_average = tf.train.ExponentialMovingAverage( FLAGS.MOVING_AVERAGE_DECAY, global_step) variables_average_op = variable_average.apply(tf.trainable_variables()) # 将计算损失函数相关的计算都放在一个命名空间下 with tf.name_scope('loss_function'): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.add_to_collection('losses', cross_entropy_mean) loss = tf.add_n(tf.get_collection('losses')) tf.summary.scalar('loss_function', loss) # 将定义学习率、优化方法以及每一轮训练需要执行的操作放在一个命名空间 with tf.name_scope('train_step'): learning_rate = tf.train.exponential_decay(FLAGS.LEARNING_RATE_BASE, global_step, 50000 / FLAGS.BATCH_SIZE, FLAGS.LEARNING_RATE_DECAY, staircase=True) tf.summary.scalar('learning_rate', learning_rate) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step) # 顺序执行 with tf.control_dependencies([train_step, variables_average_op]): train_op = tf.no_op(name='train') with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy_train = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.summary.scalar('accuracy_train', accuracy_train) accuracy_test = tf.reduce_mean(tf.cast(correct_prediction, 'float')) # tf.summary.scalar('accuracy_test', accuracy_test) # 初始化Tensorflow持久化类 saver = tf.train.Saver() with tf.Session() as sess: # sess = tfdbg.LocalCLIDebugWrapperSession(sess, ui_type="readline") # 被调试器封装的会话 tf.global_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 合并日志 merged = tf.summary.merge_all() writer = tf.summary.FileWriter('../log_train', tf.get_default_graph()) xs_test, ys_test = sess.run([images_test, lables_test]) # 对标签进行onehot编码 ys_test_onehot = np.eye(10, dtype=float)[ys_test] # 在训练过程中不再测试模型在验证数据上的表现,验证和测试的过程会有一个独立的程序来完成 for i in range(FLAGS.TRAINING_STEPS): xs, ys = sess.run([images_train, lables_train]) # 对标签进行onehot编码 ys_onehot = np.eye(10, dtype=float)[ys] # 每1000轮保存一次模型 if i % 1000 == 0: # 配置运行时需要记录的信息 run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) # 运行时记录运行信息的proto run_metadata = tf.RunMetadata() # 将配置信息和记录运行信息的proto传入运行的过程,从而记录运行时每一个节点的时间、空间开销信息 _, loss_value, step, result = sess.run( [train_op, loss, global_step, merged], feed_dict={ x: xs, y_: ys_onehot }, options=run_options, run_metadata=run_metadata) # 将节点在运行时的信息写入日志文件 writer.add_run_metadata(run_metadata, 'step%03d' % i) writer.add_summary(result, i) # 输出当前的训练情况。这里只输出了模型在当前训练batch上的损失函数大小。通过损失函数的大小可以大概了解 # 训练的情况。在验证集上的正确率信息会有一个单独的程序来 生成。 train_accuracy = accuracy_train.eval(feed_dict={ x: xs, y_: ys_onehot }) test_accuracy = accuracy_test.eval(feed_dict={ x: xs_test, y_: ys_test_onehot }) print( '%s:After %d training steps, loss = %g, accuracy = %g, validation accuracy=%g' % (datetime.now(), i, loss_value, train_accuracy, test_accuracy)) # 保存当前的模型。这里给出了global_step参数,这样可以让每个被保存的文件名末尾加上训练的轮数,比如 # 'model.ckpt-1000'表示训练1000轮之后得到的模型 saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) else: _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys_onehot }) coord.request_stop() coord.join(threads) writer.close()