def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' #images, labels = cifar10.inputs(eval_data=eval_data) images, labels = cifar10_input.inputs(eval_data=eval_data, data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. #variable_averages = tf.train.ExponentialMovingAverage( # cifar10.MOVING_AVERAGE_DECAY) #variables_to_restore = variable_averages.variables_to_restore() #saver = tf.train.Saver(variables_to_restore) saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def inputs(eval_data): """Construct input for CIFAR evaluation using the Reader ops. Args: eval_data: bool, indicating if one should use the train or eval data set. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. Raises: ValueError: If no data_dir """ if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') return cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size)
def test(): cifar10.maybe_download_and_extract() images_test, labels_test = cifar10_input.inputs(eval_data = True, data_dir=DATA_DIR, batch_size=BATCH_SIZE) x = tf.placeholder(tf.float32, shape = [None,24,24,3]) y_ = tf.placeholder(tf.float32, shape = [None]) keep_prob = tf.placeholder(tf.float32) y = cnn_forward.forward(x,keep_prob,cnn_backward.REGULARIZER) ema = tf.train.ExponentialMovingAverage(cnn_backward.MOVING_AVERAGE_DECAY) ema_restore = ema.variables_to_restore() saver = tf.train.Saver(ema_restore) correct = tf.equal(tf.argmax(y, 1), tf.cast(y_, tf.int64)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) while True: sess = tf.InteractiveSession() coord = tf.train.Coordinator() queue_runner = tf.train.start_queue_runners(sess, coord = coord) image_batch, label_batch = sess.run([images_test, labels_test]) ckpt = tf.train.get_checkpoint_state(cnn_backward.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] accuracy_score = sess.run(accuracy, feed_dict = {x:image_batch, y_:label_batch, keep_prob:0.98}) print("after %s setps, test accuracy is %g"%(global_step, accuracy_score)) else: print("no checkpoint") return coord.request_stop() coord.join(queue_runner) sess.close() time.sleep(TEST_INTERVAL)
def train(): global parameters # Change format to NCHW if that is the data format passed or CP if FLAGS.data_format == 'NHWC' or FLAGS.device_id == -1: global data_format, data_format_c data_format = 'NHWC' data_format_c = 'channels_last' config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) config.intra_op_parallelism_threads = 1 config.inter_op_parallelism_threads = 0 device_str = get_device_str(FLAGS.device_id) if device_str.find('cpu') >= 0: # cpu version num_threads = os.getenv('OMP_NUM_THREADS', 1) print 'num_threads: ', num_threads config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=int(num_threads)) with tf.Graph().as_default(), tf.device(device_str), tf.Session(config=config) as sess: initalizer = None images = None labels = None with tf.device('/cpu:0'): if FLAGS.use_dataset: iterator, initalizer = cifar10_input.dataSet(FLAGS.data_dir, FLAGS.batch_size, data_format=data_format) images, labels = iterator.get_next() else: images, labels = cifar10_input.inputs(False, FLAGS.data_dir, FLAGS.batch_size, data_format=data_format) labels = tf.contrib.layers.one_hot_encoding(labels, 10) logits = inference(images) # Add a simple objective so we can calculate the backward pass. loss_value = loss(logits, labels) # Compute the gradient with respect to all the parameters. lr = 0.001 grad = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss_value) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation. init = tf.global_variables_initializer() # Start running operations on the Graph. sess.run(init) coord = None threads = None if FLAGS.use_dataset: sess.run(initalizer) else: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) real_batch_size = FLAGS.batch_size num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] average_loss = 0.0 for step in xrange(iterations): start_time = time.time() _, loss_v = sess.run([grad, loss_value]) duration = time.time() - start_time average_loss += loss_v average_batch_time += float(duration) assert not np.isnan(loss_v), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = FLAGS.batch_size / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_v, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step print ('epoch: %d, loss: %.2f' % (step /num_batches_per_epoch, average_loss)) epochs_info.append('%d:_:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) average_loss = 0.0 if step == iterations-1: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if not FLAGS.use_dataset: coord.request_stop() coord.join(threads) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
def run_training(): #loading data # src_images,classes,src_labels = cnn_data_loading.load_training_data() # src_test_images,_,src_test_labels = cnn_data_loading.load_test_data() cifar10_gtf.maybe_download_and_extract() train_images, train_labels = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_size) test_images, test_labels = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size) #set environment log_dir = os.getcwd() + '/log' print('log_dir is ' + log_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) #defince the placeholder x = tf.placeholder(tf.float32, shape=[None, img_height, img_width, img_channel], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 10], name='y_') y_cls = tf.argmax(y_, dimension=1) #build the graph y_cnn, keep_prob_local3, keep_prob_local4 = inference(x) #define the variable in the training cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_cnn) loss = tf.reduce_mean(cross_entropy) tf.summary.scalar('loss', loss) correct_prediction = tf.equal(tf.arg_max(y_cnn, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) train_step = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss) #define the saver,must after at least one variable has been defined saver = tf.train.Saver() start_step = 0 #start the sess with tf.Session() as sess: try: print("Trying to restore last checkpoint ...") # Use TensorFlow to find the latest checkpoint - if any. last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=log_dir) # Try and load the data in the checkpoint. saver.restore(sess, save_path=last_chk_path) start_step = int(last_chk_path.split('/')[-1].split('-')[-1]) # If we get to this point, the checkpoint was successfully loaded. print('Restored checkpoint from:%s, step:%d' % (last_chk_path, start_step)) except Exception as e: # If the above failed for some reason, simply # initialize all the variables for the TensorFlow graph. print("Restore fails : {0}, initialize...".format(e)) init = tf.global_variables_initializer() sess.run(init) #record the process summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_dir, sess.graph) num_iterations = int(training_size / batch_size) # loss_value = tf.Variable(tf.float32,0) for i in xrange(epoch): for j in xrange(num_iterations): step = start_step + i * num_iterations + j + 1 #begin to train now. First load the data image_batch, label_batch = sess.run( [train_images, train_labels]) _, loss_value, _summary_str = sess.run( [train_step, loss, summary], feed_dict={ x: image_batch, y_: label_batch, keep_prob_local3: 0.5, keep_prob_local4: 0.5 }) summary_writer.add_summary(_summary_str, step) # limited memory, accuracy is calculated with a batch of 5000, and count mean value count_times = int(training_size / batch_size) train_accuracy = 0 if (j == num_iterations - 1): for k in xrange(count_times): training_feed_dict = { x: image_batch, y_: label_batch, keep_prob_local3: 1.0, keep_prob_local4: 1.0 } train_accuracy = train_accuracy + accuracy.eval( feed_dict=training_feed_dict) #get mean value train_accuracy = train_accuracy / count_times print('step %d, training accuracy %g' % (step, train_accuracy)) # test_count_times = int(test_size / accuracy_batch) test_accuracy = 0 for test_k in xrange(count_times): test_images_batch, test_labels_batch = sess.run( [test_images, test_labels]) test_feed_dict = { x: test_images_batch, y_: test_labels_batch, keep_prob_local3: 1.0, keep_prob_local4: 1.0 } test_accuracy = test_accuracy + accuracy.eval( feed_dict=test_feed_dict) #calculate the total test accuracy test_accuracy = test_accuracy / count_times print('step %d, test accuracy %g' % (step, test_accuracy)) # Save all variables of the TensorFlow graph to a # checkpoint. Append the global_step counter # to the filename so we save the last several checkpoints. checkpoint_file = os.path.join(log_dir, 'model.ckpt') saver.save(sess, save_path=checkpoint_file, global_step=step) print("Saved checkpoint " + str(step) + " steps.")
display_step = 100 record_step = 5000 train_size = 50000 test_size = 10000 # 数据集目录 data_dir = './CIFAR/' print('begin') with tf.name_scope("get_data"): # 获取训练集数据 # images_train, labels_train = cifar10_input.inputs( # eval_data=False, data_dir=data_dir, batch_size=batch_size) images_train, labels_train = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_size) #奇怪读取数据的过程移到下面程序就会无限制卡住 image_test, labesl_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=test_size) image_test_mid, labesl_test_mid = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=1000) print('begin data') def weight_variable(name, shape, stddev=5e-2, wd=None): initial = tf.truncated_normal(shape=shape, mean=0.0, stddev=stddev) if wd != None: weight_decay = tf.multiply(tf.nn.l2_loss(initial), wd, name='weight_loss') tf.add_to_collection('losses', weight_decay) return tf.Variable(initial, name=name)
def eval(): '''测试主程序''' eval_data = FLAGS.eval_data == 'test' # 给出是否适用测试集的布尔变量 #准备数据,传入数据以batch为单位 images_test, labels_test = cifar10_input.inputs( eval_data=eval_data, data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size) # 检测时仍然需要建立holder 节点化仅是数据在流通 images_holder = tf.placeholder(tf.float32, [FLAGS.batch_size, 24, 24, 3]) labels_holder = tf.placeholder(tf.int32, [FLAGS.batch_size]) #网络预测 logits = my_cifarmodel.inference(images_holder, training=False) saver = tf.train.Saver() loss = my_cifarmodel.loss(logits, labels_holder) tf.summary.scalar('loss', loss) #检测效果 top_k_op = tf.nn.in_top_k(logits, labels_holder, 1) true_rate = tf.reduce_mean(tf.cast(top_k_op, tf.float32)) tf.summary.scalar('true_rate', true_rate) merged = tf.summary.merge_all() with tf.Session() as sess: test_writer = tf.summary.FileWriter(FLAGS.eval_dir, sess.graph) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) total_examples_num = num_iter * FLAGS.batch_size true_count = 0 step = 0 tf.train.start_queue_runners() while step < num_iter: #取出一批数据 images_batch, labels_batch = sess.run([images_test, labels_test]) #统计预测结果 # loss_value = sess.run(loss, # feed_dict={images_holder: images_batch, # labels_holder: labels_batch}) true_count0 = np.sum( sess.run(top_k_op, feed_dict={ images_holder: images_batch, labels_holder: labels_batch })) loss_value, summary = sess.run([loss, merged], feed_dict={ images_holder: images_batch, labels_holder: labels_batch }) test_writer.add_summary(summary, step) # 这一句再写入log true_rate = true_count0 / FLAGS.batch_size step += 1 true_count += true_count0 print('step %d true rate: %.3f loss: %.3f' % (step, true_rate, loss_value)) test_writer.close() total_true_rate = true_count / total_examples_num print('Total true rate: %.3f' % total_true_rate)
correct_prediction = tf.equal(tf.argmax(logits, 1), y_holder) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) saver = tf.train.Saver() coord = tf.train.Coordinator() merged = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter( "/deeplearning_cifar10/logs/", sess.graph) data_dir = r'E:\cifar10_data\cifar-10-batches-bin' train_image, train_label = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_size) test_image, test_label = cifar10_input.inputs( eval_data=True, data_dir=data_dir, batch_size=batch_size) threads = tf.train.start_queue_runners(sess=sess, coord=coord) loss_list = [] for i in range(max_step): # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)#配置运行时需要记录的信息 # run_metadata = tf.RunMetadata()#运行时记录运行信息的proto lr = learning_rate_schedule(max_step) op_train, op_labels = sess.run([train_image, train_label]) _, loss, step = sess.run([train_op, cost, global_step], feed_dict={ x_holder: op_train, y_holder: op_labels, keep_prob: 0.5, learning_rate: lr, is_training: True}) precision = sess.run( accuracy, feed_dict={ x_holder: op_train, y_holder: op_labels, keep_prob: 1,
def main(): def loss(logits, y): labels = tf.cast(y, tf.int64) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=y, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) return tf.add_n(tf.get_collection('losses'), name='total_loss') max_epoch = 3000 batch_step = 128 data_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'cifar-10-batches-bin') # cifar10.maybe_download_and_extract() train_images, train_labels = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_step) test_images, test_labels = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=1000) x = tf.placeholder(tf.float32, [None, 24, 24, 3]) y = tf.placeholder(tf.int32, [None]) model = alexNet(x, 10) loss = loss(model.fc3, y) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.train.AdamOptimizer(0.1**3).minimize(loss) top_k_op = tf.nn.in_top_k(model.fc3, y, 1) accuracy = tf.reduce_mean(tf.cast(top_k_op, tf.float32)) sess = tf.InteractiveSession() tf.global_variables_initializer().run() tf.train.start_queue_runners() fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plt.ion() plt.show() train_list = [] test_list = [] test_x, test_y = sess.run([test_images, test_labels]) for i in range(max_epoch): start_time = time.time() train_x, train_y = sess.run([train_images, train_labels]) _, loss_value = sess.run([train_op, loss], feed_dict={ x: train_x, y: train_y }) duration = time.time() - start_time if i % 100 == 0: examples_per_sec = batch_step / duration sec_per_batch = float(duration) format_str = ( 'step %d,loss=%.2f (%.1f examples/sec; %.3f sec/batch)') print(format_str % (i, loss_value, examples_per_sec, sec_per_batch)) train_accuracy = accuracy.eval(feed_dict={x: train_x, y: train_y}) model.training = False test_accuracy = accuracy.eval(feed_dict={x: test_x, y: test_y}) model.training = True print("step %d, training accuracy %g" % (i, train_accuracy)) print("step %d,test accuracy %g" % (i, test_accuracy)) train_list.append(train_accuracy) test_list.append(test_accuracy) saver = tf.train.Saver() x_axis = list(np.arange(1, max_epoch / 100 + 1) * 100) save_path = saver.save(sess, model_path) ax.plot(x_axis, train_list, 'b-', 'o', lw=5) ax.plot(x_axis, train_list, 'r-', 'v', lw=5) model.training = False precision = accuracy.eval(feed_dict={x: test_x, y: test_y}) model.training = True print('precision @1 = %.3f' % precision)
correct_prediction = tf.equal( tf.cast(tf.argmax(output, 1), dtype=tf.int32), y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar("validation error", (1.0 - accuracy)) return accuracy g = tf.Graph() with g.as_default(): if data_set == 'cifar10': with tf.device('/cpu:0'): X_train_processed, Y_train = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_size) X_test_processed, Y_test = cifar10_input.inputs( eval_data=False, data_dir=data_dir, batch_size=batch_size) X_valid_processed, Y_valid = cifar10_input.inputs( eval_data=True, data_dir=data_dir, batch_size=batch_size) if isNB: output, x, y, keep_prob, is_train = inference() else: output, x, y, keep_prob = inference() cost = loss(output, y) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = training(cost, global_step) eval_op = evaluate(output, y)
validation_data=(x_test, y_test), initial_epoch=epoch - 1) model.save_weights('cifar10vgg.h5') return model if __name__ == '__main__': #(x_train, y_train), (x_test, y_test) = cifar10.load_data() #x_train = x_train.astype('float32') #x_test = x_test.astype('float32') cifar10_input.maybe_download_and_extract(DATA_DIR) print("cifar10 data path", os.path.join(DATA_DIR, 'cifar-10-batches-bin')) x_train, y_train = cifar10_input.inputs( False, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) x_test, y_test = cifar10_input.inputs( True, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL) x_train = x_train.astype('float32') x_test = x_test.astype('float32') #y_train = keras.utils.to_categorical(y_train, 10) #y_test = keras.utils.to_categorical(y_test, 10) print(y_train[10]) print(y_test[10]) model = cifar10vgg()
def train(self, model): #training parameters batch_size = 128 maxepoches = 250 learning_rate = 0.1 lr_decay = 1e-6 # The data, shuffled and split between train and test sets: #(x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train, y_train = cifar10_input.inputs( False, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) x_test, y_test = cifar10_input.inputs( True, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL) x_train = x_train.astype('float32') x_test = x_test.astype('float32') #x_train, x_test = self.normalize(x_train, x_test) #y_train = keras.utils.to_categorical(y_train, self.num_classes) #y_test = keras.utils.to_categorical(y_test, self.num_classes) lrf = learning_rate #data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0, # randomly shift images horizontally (fraction of total width) height_shift_range= 0, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images #(std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) #optimization details sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True) #model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) model.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # training process in a for loop with learning rate drop every 25 epoches. for epoch in range(1, maxepoches): if epoch % 25 == 0 and epoch > 0: lrf /= 2 sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #historytemp = model.fit_generator(datagen.flow(x_train, y_train, # batch_size=batch_size), # steps_per_epoch=x_train.shape[0] // batch_size, # epochs=epoch, # validation_data=(x_test, y_test),initial_epoch=epoch-1) historytemp = model.fit_generator( cifar10_input.inputs(False, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), generator=True), batch_size, steps_per_epoch=cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN, epochs=epoch, validation_data=(x_test, y_test), initial_epoch=epoch - 1) model.save_weights('cifar10vgg.h5') return model
def inputs(eval_data=True): data_dir = os.path.join('data/cifar10_data', 'cifar-10-batches-bin') return cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=batch_size)
def train(): global parameters config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) if device_str.find('cpu') >= 0: # cpu version num_threads = os.getenv('OMP_NUM_THREADS', 1) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=int(num_threads)) with tf.Graph().as_default(), tf.device(get_device_str( FLAGS.device_id)), tf.Session(config=config) as sess: images, labels = cifar10_input.inputs(False, FLAGS.data_dir, FLAGS.batch_size) print('Images: ', images) #logits = inference(images, is_training=True, num_blocks=9) logits = inference_small(images, is_training=True, num_blocks=9) # Add a simple objective so we can calculate the backward pass. loss_value = loss(logits, labels) # Compute the gradient with respect to all the parameters. lr = 0.01 #grad = tf.train.GradientDescentOptimizer(lr).minimize(loss_value) grad = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss_value) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation. init = tf.initialize_all_variables() # Start running operations on the Graph. sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) real_batch_size = FLAGS.batch_size num_batches_per_epoch = int( (EPOCH_SIZE + real_batch_size - 1) / real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] average_loss = 0.0 for step in xrange(iterations): start_time = time.time() _, loss_v = sess.run([grad, loss_value]) duration = time.time() - start_time average_batch_time += float(duration) average_loss += loss_v assert not np.isnan(loss_v), 'Model diverged with loss = NaN' if step % FLAGS.log_step == 0: examples_per_sec = FLAGS.batch_size / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_v, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step epochs_info.append( '%d:_:%s' % (step / (FLAGS.eval_step * num_batches_per_epoch), average_loss)) average_loss = 0.0 if step == iterations - 1: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print('epoch_info: %s' % ','.join(epochs_info))
""" Created on Tue Nov 27 21:01:45 2018 @author: MR """ import tensorflow as tf import cifar10_input import numpy as np #from tensorflow.contrib.layers.python.layers import batch_norm #from matplotlib import pylab batch_size = 128 data_dir = '.\cifar-10-python\cifar-10-batches-py' image_train, labels_train = cifar10_input.inputs(eval_data=False, data_dir=data_dir, batch_size=batch_size) image_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size) #定义batch_norm层,传参里面还包含一个是否训练 #def batch_norm_layer(value,train=None,name='batch_norm'): # if train is not None: # return batch_norm(value,decay=0.9,updates_collections=None,is_training=True) # else: # return batch_norm(value,decay=0.9,updates_collections=None,is_training=False) #输入输出 x = tf.placeholder(tf.float32, [None, 24, 24, 3]) y = tf.placeholder(tf.float32, [None, 10])
#import part import tensorflow as tf import numpy as np import cifar10_input as cifar10 #input data batch_size = 10000 input_data = cifar10.inputs( True, "/home/schka/Documents/deep_learn_sajat/cifar-10-batches-py", batch_size) init = tf.global_variables_initializer() sess = tf.Session() tf.train.start_queue_runners(sess) sess.run(init) dec_labels = sess.run(input_data[1]) #print(dec_labels.dtype) #print(dec_labels.shape) #dec_labels=dec_labels.tolist() labels = np.zeros((batch_size, 10)) for i in range(0, batch_size - 1): ind = dec_labels[i] labels[i, ind] = 1. images = sess.run(input_data[0]) def get_images(): return images def get_labels():
def train(): global parameters config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) config.intra_op_parallelism_threads = 1 config.inter_op_parallelism_threads = 0 with tf.Graph().as_default(), tf.device("/" + FLAGS.local_ps_device + ":0"): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) device_ids = FLAGS.device_ids if not device_ids: device_ids = [str(i) for i in range(FLAGS.num_gpus)] else: device_ids = device_ids.split(',') print('device_ids: ', device_ids) if len(device_ids) > FLAGS.num_gpus: print('The device_ids should have the same number of GPUs with num_gpus') return lr = 0.001 optimizer = tf.train.MomentumOptimizer(lr, 0.9) def assign_to_device(device, ps_device=FLAGS.local_ps_device): worker_device = device ps_sizes = [0] if FLAGS.local_ps_device.lower == 'gpu': ps_sizes = [0] * FLAGS.num_gpus def _assign(op): if op.device: return op.device if op.type not in ['Variable', 'VariableV2']: return worker_device device_index, _ = min(enumerate( ps_sizes), key=operator.itemgetter(1)) device_name = '/' + FLAGS.local_ps_device +':' + str(device_index) var_size = op.outputs[0].get_shape().num_elements() ps_sizes[device_index] += var_size return device_name return _assign images = None labels = None initalizer = None if FLAGS.use_dataset: with tf.device('/CPU:0'): iterator, initalizer = cifar10_input.dataSet(FLAGS.data_dir, FLAGS.batch_size) images, labels = iterator.get_next() tower_grads = [] average_loss_tensor = [] reuse_variables = False for i in xrange(FLAGS.num_gpus): print('what is i: ', i) with tf.device('/gpu:%s'%device_ids[i]): with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as n_scope: _init_global_variables() with tf.device('/cpu:0'): if not FLAGS.use_dataset: images, labels = cifar10_input.inputs(False, FLAGS.data_dir, FLAGS.batch_size) with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = inference(images) loss = loss_function(logits, tf.contrib.layers.one_hot_encoding(labels, 10)) reuse_variables = True average_loss_tensor.append(loss) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) train_op = apply_gradient_op average_op = tf.reduce_mean(average_loss_tensor) # Create a saver. saver = tf.train.Saver(tf.global_variables()) init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) coord = None threads = None if FLAGS.use_dataset: sess.run(initalizer) else: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) real_batch_size = FLAGS.batch_size * FLAGS.num_gpus num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] step = 0 average_loss = 0.0 for step in xrange(iterations): start_time = time.time() _, loss_v = sess.run([train_op, average_op]) duration = time.time() - start_time average_batch_time += float(duration) assert not np.isnan(loss_v), 'Model diverged with loss = NaN' average_loss += loss_v if step % FLAGS.log_step == 0: examples_per_sec = real_batch_size / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_v, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step print ('epoch: %d, loss: %.2f' % (step /num_batches_per_epoch, average_loss)) epochs_info.append('%d:_:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) average_loss = 0.0 checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if not FLAGS.use_dataset: coord.request_stop() coord.join(threads) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
import tensorflow as tf import time, os from config import opt from cifar_model import * os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' with tf.device("/cpu:0"): with tf.Graph().as_default(): with tf.variable_scope("cifar_conv_bn_model"): # Load dataset distorted_images, distorted_labels = cifar10_input.distorted_inputs(data_dir=opt.data_dir, batch_size=opt.batch_size) val_images, val_labels = cifar10_input.inputs(eval_data=True, data_dir=opt.data_dir, batch_size=opt.batch_size) # Computation x = tf.placeholder("float", [None, 24, 24, 3]) y = tf.placeholder("int32", [None]) keep_prob = tf.placeholder(tf.float32) # dropout probability phase_train = tf.placeholder(tf.bool) # training or testing # Model output = inference(x,keep_prob,phase_train) # Loss loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=tf.cast(y, tf.int64))) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr).minimize(loss) init = tf.global_variables_initializer() # Accuracy
'''创建变量 创建变量,并为变量添加L2约束''' var = tf.Variable(tf.truncated_normal(shape, stddev=stddev)) if wl is not None: weight_loss = tf.multiply(tf.nn.l2_loss(var), wl, name='weight_loss') tf.add_to_collection('losses', weight_loss) return var ##下载数据集 cifar10.maybe_download_and_extract() ##生成增强(distorted)过的训练数据 images_train, labels_train = cifar10_input.distorted_inputs(data_dir=data_dir, batch_size=batch_size) ##生成测试数据,只裁剪,无增强 images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size) ##创建输入数据的占位符 images_holder = tf.placeholder(dtype=tf.float32, shape=[batch_size, 24, 24, 3]) labels_holder = tf.placeholder(dtype=tf.int32, shape=[batch_size]) #================================↓开始构建正向网络↓================================== #====================================== # 创建第一个卷积层 #====================================== w_conv1 = variable_with_weight_loss(shape=[5, 5, 3, 64], stddev=0.05) b_conv1 = tf.Variable(tf.constant(0.0, shape=[64])) conv_conv1 = tf.nn.conv2d(input=images_holder, filter=w_conv1, strides=[1, 1, 1, 1], padding='SAME') h_conv1 = tf.nn.relu(tf.nn.bias_add(conv_conv1, b_conv1))
def inputs(eval_data): """Construct input for CIFAR evaluation using the Reader ops. Args: eval_data: bool, indicating if one should use the train or eval data set. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. Raises: ValueError: If no data_dir """ if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') images, labels = cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels # def accuracy(logits, labels): # # # Calculate the average cross entropy loss across the batch. # labels = tf.cast(labels, tf.int64) # accuracy = tf.metrics.accuracy(labels= labels, predictions=logits,name='accuracy_per_example') # return accuracy # def inference(images): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ # We instantiate all variables using tf.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.get_variable() with tf.Variable(). # # conv1 with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=None) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv1) # pool1 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # norm1 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # conv2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv2) # pool2 pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # norm2 norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # conv3 with tf.variable_scope('conv3') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 128, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm2, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv3) # norm2 norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm3') # pool2 pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. reshape = tf.reshape(pool3, [images.get_shape().as_list()[0], -1]) dim = reshape.get_shape()[1].value weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) _activation_summary(local3) # local4 with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) _activation_summary(local4) # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. with tf.variable_scope('softmax_linear') as scope: weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1 / 192.0, wd=None) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) _activation_summary(softmax_linear) # tf.summary.scalar('accuracy', accuracy(softmax_linear,labels)) return softmax_linear
def evaluate_last(): """Loads the model and runs evaluation """ with tf.Graph().as_default(): # Get images and labels for CIFAR-10. model_dir = os.path.join(FLAGS.model_dir, FLAGS.name) eval_data = FLAGS.eval_data == "test" images, labels = data_input.inputs(eval_data=eval_data, data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size) # images, labels = data_input.distorted_inputs(eval_data=eval_data, data_dir=FLAGS.data_dir, # batch_size=FLAGS.batch_size) # Generate placeholders for the images and labels. keep_prob = utils.placeholder_inputs(FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = model.inference(images, keep_prob) # Add to the Graph the Ops for loss calculation. loss = model.loss(logits, labels) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Add the Op to compare the logits to the labels during evaluation. eval_correct = model.evaluation(logits, labels) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Restore the moving average version of the learned variables for eval. # variable_averages = tf.train.ExponentialMovingAverage( # cifar10.MOVING_AVERAGE_DECAY) # variables_to_restore = variable_averages.variables_to_restore() # saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. # summary_op = tf.merge_all_summaries() # graph_def = tf.get_default_graph().as_graph_def() # summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, # graph_def=graph_def) # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) print(model_dir) ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print("No checkpoints found! ") exit(1) print("Doing Evaluation with lots of data") utils.do_eval( sess=sess, eval_correct=eval_correct, keep_prob=keep_prob, num_examples=data_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, )
def variable_with_weight_loss(shape, stddev, wl): var = tf.Variable(tf.truncated_normal(shape, stddev=stddev)) if wl is not None: weight_loss = tf.multiply(tf.nn.l2_loss(var), wl, name='weight_loss') tf.add_to_collection('losses', weight_loss) return var cifar10.maybe_download_and_extract() images_train, labels_train = cifar10_input.distorted_inputs( data_dir=data_dir, batch_size=batch_size) images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size) image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3]) label_holder = tf.placeholder(tf.int32, [batch_size]) weight1 = variable_with_weight_loss(shape=[5, 5, 3, 64], stddev=5e-2, wl=0.0) kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME') bias1 = tf.Variable(tf.constant(0.0, shape=[64])) conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1)) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
def main(): """ 主函数 """ # 下载cifar10数据集并解压 cifar10.maybe_download_and_extract() # distorted_inputs产生训练数据 images_train, labels_train = cifar10_input.distorted_inputs(data_dir=dataset_dir, batch_size=batch_size) # 产生测试数据 images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=dataset_dir, batch_size=batch_size) # 为特征和label创建placeholder image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3]) label_holder = tf.placeholder(tf.int32, [batch_size]) # 创建CNN网络,并得到输出 logitis = build_cnn_network(image_holder) # 计算loss total_loss = get_total_loss(logitis, label_holder) # 设置优化算法 train_op = tf.train.AdamOptimizer(1e-3).minimize(total_loss) top_k_op = tf.nn.in_top_k(logitis, label_holder, 1) # 创建session sess = tf.InteractiveSession() tf.global_variables_initializer().run() # 启动线程操作,因为cifar10_input.distorted_inputs需要线程操作 tf.train.start_queue_runners() # 训练模型 for step in range(max_steps): start_time = time.time() image_batch, label_batch = sess.run([images_train, labels_train]) _, loss_value = sess.run([train_op, total_loss], feed_dict={image_holder: image_batch, label_holder: label_batch}) duration = time.time() - start_time if step % disp_step == 0: sample_per_sec = batch_size / duration sec_per_batch = float(duration) print('step %d, loss=%.2f (%.1f sample/sec; %.3f sec/batch)' % ( step, loss_value, sample_per_sec, sec_per_batch )) # 测试模型 n_test_samples = 10000 n_iter = int(math.ceil(n_test_samples / batch_size)) true_count = 0 total_sample_count = n_iter * batch_size step = 0 while step < n_iter: image_batch, label_batch = sess.run([images_test, labels_test]) predictions = sess.run([top_k_op], feed_dict={image_holder: image_batch, label_holder: label_batch}) true_count += np.sum(predictions) step += 1 precision = true_count / total_sample_count print('top 1 precision: %.3f' % precision)
def train(): print('[Dataset Configuration]') print('\tCIFAR-100 dir: %s' % FLAGS.data_dir) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of test images: %d' % FLAGS.num_test_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) #print('\tResidual blocks per group: %d' % FLAGS.num_residual_units) #print('\tNetwork width multiplier: %d' % FLAGS.k) print('[Testing Configuration]') print('\tCheckpoint path: %s' % FLAGS.ckpt_path) print('\tDataset: %s' % ('Training' if FLAGS.train_data else 'Test')) print('\tNumber of testing iterations: %d' % FLAGS.test_iter) print('\tOutput path: %s' % FLAGS.output) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): # The CIFAR-100 dataset with tf.variable_scope('test_image'): test_images, test_labels = data_input.inputs( not FLAGS.train_data, FLAGS.data_dir, FLAGS.batch_size) # The class labels with open(os.path.join(FLAGS.data_dir, 'batches.meta.txt')) as fd: classes = [temp.strip() for temp in fd.readlines()] # Build a Graph that computes the predictions from the inference model. images = tf.placeholder(tf.float32, [ FLAGS.batch_size, data_input.IMAGE_SIZE, data_input.IMAGE_SIZE, 3 ]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) # Build model decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size hp = wrinc.HParams(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, initial_lr=FLAGS.initial_lr, decay_step=decay_step, lr_decay=FLAGS.lr_decay, momentum=FLAGS.momentum) network = wrinc.wrinc(hp, images, labels, None) network.build_model() # network.build_train_op() # NO training op # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) if os.path.isdir(FLAGS.ckpt_path): ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path) # Restores from checkpoint if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found in the dir [%s]' % FLAGS.ckpt_path) sys.exit(1) elif os.path.isfile(FLAGS.ckpt_path): print('\tRestore from %s' % FLAGS.ckpt_path) saver.restore(sess, FLAGS.ckpt_path) else: print('No checkpoint file found in the path [%s]' % FLAGS.ckpt_path) sys.exit(1) # Start queue runners tf.train.start_queue_runners(sess=sess) # Testing! result_ll = [[0, 0] for _ in range(FLAGS.num_classes) ] # Correct/wrong counts for each class test_loss = 0.0, 0.0 for i in range(FLAGS.test_iter): test_images_val, test_labels_val = sess.run( [test_images, test_labels]) preds_val, loss_value, acc_value = sess.run( [network.preds, network.loss, network.acc], feed_dict={ network.is_train: False, images: test_images_val, labels: test_labels_val }) test_loss += loss_value for j in range(FLAGS.batch_size): correct = 0 if test_labels_val[j] == preds_val[j] else 1 result_ll[test_labels_val[j] % FLAGS.num_classes][correct] += 1 test_loss /= FLAGS.test_iter # Summary display & output acc_list = [float(r[0]) / float(r[0] + r[1]) for r in result_ll] result_total = np.sum(np.array(result_ll), axis=0) acc_total = float(result_total[0]) / np.sum(result_total) print 'Class \t\t\tT\tF\tAcc.' format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): print format_str % (classes[i], result_ll[i][0], result_ll[i][1], acc_list[i]) print(format_str % ('(Total)', result_total[0], result_total[1], acc_total)) # Output to file(if specified) if FLAGS.output.strip(): with open(FLAGS.output, 'w') as fd: fd.write('Class \t\t\tT\tF\tAcc.\n') format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): t, f = result_ll[i] format_str = '%-31s %7d %7d %.5f\n' fd.write(format_str % (classes[i].replace(' ', '-'), t, f, acc_list[i])) fd.write( format_str % ('(Total)', result_total[0], result_total[1], acc_total))
def __init__(self, eval=False): # pretty standard/simple DNN loosely based on alexnet since its # simple and old so it trains decently fast on a GTX860M # decrease learning rate over time # input -> 32x32x3 # conv1 (f=5, s=1, k=64 relu) -> 32x32x64 # pool1 (f=3, s=2) -> 16x16x64 # conv2 (f=5, s=1, k=64 relu) -> 16x16x64 # pool2 (f=3, s=2) 8x8x64 # dropout (.5) # fc1 (384 relu) -> 1x384 # fc2 (192 relu) -> 1x192 # linear -> 1x10 epochs = 100 learning_rate = .001 batch_size = 16 early_stop = False num_train = 50000 f = [3, 3, 3, 3] k = [32, 32, 64, 64, 384] # conv1 w1 = self.weight('w1', [f[0], f[0], 3, k[0]]) b1 = self.bias('b1', [k[0]]) # conv2 w2 = self.weight('w2', [f[1], f[1], k[0], k[1]]) b2 = self.bias('b2', [k[1]]) # conv3 w3 = self.weight('w3', [f[2], f[2], k[1], k[2]]) b3 = self.bias('b3', [k[2]]) # conv4 w4 = self.weight('w4', [f[3], f[3], k[2], k[3]]) b4 = self.bias('b4', [k[3]]) # fc1 w5 = self.weight('w5', [8 * 8 * k[3], k[4]]) b5 = self.bias('b5', [k[4]]) # fc2 w6 = self.weight('w6', [k[4], 10]) b6 = self.bias('b6', [10]) # linear # w5 = self.weight('w5', [k[3], 10]) # b5 = self.bias('b5', [10]) self.params = (w1, w2, w3, w4, w5, w6) if eval: images, labels = cifar10_input.inputs(True, 'cifar-10-batches-bin', 1000) else: images, labels = cifar10_input.distorted_inputs( 'cifar-10-batches-bin', batch_size) s = [1, 1, 1, 1] global_step = tf.train.get_or_create_global_step() X_ = self.conv(images, w1, s[0], b1) X_ = self.conv(X_, w2, s[1], b2) X_ = self.pool(X_, 2, 2) X_ = tf.nn.dropout(X_, .25) # X_ = self.batch_norm(X_) X_ = self.conv(X_, w3, s[2], b3) X_ = self.conv(X_, w4, s[3], b4) X_ = self.pool(X_, 2, 2) X_ = tf.nn.dropout(X_, .25) # X_ = self.batch_norm(X_) X_ = tf.nn.relu(self.fc(X_, w5, b5)) X_ = tf.nn.dropout(X_, .5) logits = self.fc(X_, w6, b6) pred = tf.nn.softmax(logits) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) # l2 = tf.reduce_sum([tf.reduce_sum(tf.pow(w,2)) for w in self.params]) # loss = loss + weight_decay * l2 correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) optim = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) steps_per_epoch = num_train // batch_size saver = tf.train.Saver() checkpoint = 'checkpoints/model.v2.ckpt' with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: saver.restore(sess, checkpoint) except: pass if eval: total_accuracy = 0 total_loss = 0 for batch in range(10): a, l, g = sess.run([accuracy, loss, global_step]) total_accuracy += a total_loss += l print( 'global_step {} (epoch {}): test accuracy={}, test loss={}' .format(g, g // steps_per_epoch, total_accuracy / 10, total_loss / 10)) coord.request_stop() coord.join(threads) return with tqdm(range(steps_per_epoch * epochs)) as t: best = 0 for step in t: epoch, step_in_epoch = divmod(step, steps_per_epoch) if step_in_epoch == 0: saver.save(sess, checkpoint) total_accuracy = 0 total_loss = 0 a, l, o = sess.run([accuracy, loss, optim]) total_accuracy += a total_loss += l t.set_postfix( epoch=epoch, step=step_in_epoch, acc=total_accuracy / step_in_epoch, loss=total_loss / step_in_epoch, ) if early_stop: saver.restore(sess, checkpoint) coord.request_stop() coord.join(threads)
def inputs(eval_data): if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') return cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size)
from gcnn_lib.coarsening import coarsen from gcnn_lib.coarsening import lmaxX from gcnn_lib.coarsening import perm_data from gcnn_lib.coarsening import lmaxX from gcnn_lib.coarsening import rescale_L import cifar10_input DATA_DIR = "./data" # mnist = input_data.read_data_sets("data/", one_hot=False) cifar10_input.maybe_download_and_extract(DATA_DIR) # train_data = mnist.train.images.astype(np.float32) train_data, train_labels = cifar10_input.inputs( False, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN + cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_VAL) test_data, test_labels = cifar10_input.inputs( True, os.path.join(DATA_DIR, 'cifar-10-batches-bin'), cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL) ####### test_data = tf.contrib.layers.flatten(test_data) val_data = tf.slice(train_data, [cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN, 0, 0, 0], [-1, -1, -1, -1]) val_labels = tf.slice(train_labels, [cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN], [-1]) #test_data = tf.Session().run(test_data) #rotate each image by a random angle
# def accuracy(labels, output): # labels = tf.to_int64(labels) # pred_result = tf.equal(labels, tf.argmax(output, 1)) # accu = tf.reduce_mean(tf.cast(pred_result, tf.float32)) # tf.summary.scalar('accuracy', accu) # # return accu merged = tf.summary.merge_all() # 加载训练batch_size大小的数据,经过增强处理,剪裁,反转,等等 train_images, train_labels = cifar10_input.distorted_inputs( batch_size=batch_size, data_dir=data_dir) # 加载测试数据,batch_size大小,不进行增强处理 test_images, test_labels = cifar10_input.inputs(batch_size=batch_size, data_dir=data_dir, eval_data=True) # 训练 def training(max_steps, s_times, keeprob, display): with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph) # writer.close() for i in range(max_steps): for j in range(s_times): start = time.time()