def run_training(): # you need to change the directories to yours. train_dir = '/home/kevin/tensorflow/cats_vs_dogs/data/train/' logs_train_dir = '/home/kevin/tensorflow/cats_vs_dogs/logs/train/' logs_val_dir = '/home/kevin/tensorflow/cats_vs_dogs/logs/val/' train, train_label, val, val_label = input_train_val_split.get_files(train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_train_val_split.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE]) logits = model.inference(x, BATCH_SIZE, N_CLASSES) loss = model.losses(logits, y_) acc = model.evaluation(logits, y_) train_op = model.trainning(loss, learning_rate) with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess= sess, coord=coord) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images,tra_labels = sess.run([train_batch, train_label_batch]) _, tra_loss, tra_acc = sess.run([train_op, loss, acc], feed_dict={x:tra_images, y_:tra_labels}) if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([val_batch, val_label_batch]) val_loss, val_acc = sess.run([loss, acc], feed_dict={x:val_images, y_:val_labels}) print('** Step %d, val loss = %.2f, val accuracy = %.2f%% **' %(step, val_loss, val_acc*100.0)) summary_str = sess.run(summary_op) val_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def run_training(): # you need to change the directories to yours. train_dir = 'C:/MIGUEL/ML/dogs and cats/train/' logs_train_dir = 'C:/MIGUEL/ML/dogs and cats/logs/train/' logs_val_dir = 'C:/MIGUEL/ML/dogs and cats/logs/val/' train, train_label, val, val_label = input_train_val_split.get_files(train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_train_val_split.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3],name='x') y_ = tf.placeholder(tf.int32, shape=[BATCH_SIZE],name='y') training = tf.placeholder_with_default(False, shape=(), name='training') logits = model.inference(x, BATCH_SIZE, N_CLASSES,training) loss = model.losses(logits, y_) acc = model.evaluation(logits, y_) train_op = model.trainning(loss, learning_rate) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess= sess, coord=coord) saver = tf.train.Saver() summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph) bn_update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS) for op in (x,y_,training,train_op, loss, acc,summary_op,logits):#,bn_update_ops tf.add_to_collection("retrain_ops",op) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images,tra_labels = sess.run([train_batch, train_label_batch]) if step % 50 != 0: _, lossValue, accValue,bnUpdateOps = sess.run([train_op, loss, acc,bn_update_ops] ,feed_dict={x:tra_images, y_:tra_labels,training:True}) else : # means step % 50 == 0 _, lossValue, accValue,bnUpdateOps,summary_str = sess.run([train_op, loss, acc,bn_update_ops,summary_op] ,feed_dict={x:tra_images, y_:tra_labels,training:True}) train_writer.add_summary(summary_str, step) print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, lossValue, accValue*100.0)) if step % 200 == 0 or (step) == MAX_STEP: val_images, val_labels = sess.run([val_batch, val_label_batch]) val_loss, val_acc ,summary_str= sess.run([loss, acc,summary_op], feed_dict={x:val_images, y_:val_labels}) val_writer.add_summary(summary_str, step) print('** Step %d, val loss = %.2f, val accuracy = %.2f%% **' %(step, val_loss, val_acc*100.0)) if step % 20 == 0 or (step ) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def train(): with tf.name_scope('input'): train, train_label, val, val_label = input_train_val_split.get_files( train_dir, RATIO) tra_image_batch, tra_label_batch = input_train_val_split.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_image_batch, val_label_batch = input_train_val_split.get_batch( val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES]) logits = VGG.VGG16N(x, N_CLASSES, IS_PRETRAIN) loss = tools.loss(logits, y_) accuracy = tools.accuracy(logits, y_) my_global_step = tf.Variable(0, name='global_step', trainable=False) train_op = tools.optimize(loss, learning_rate, my_global_step) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tools.load_with_skip(pre_trained_weights, sess, ['fc8']) print("load weights done") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images, tra_labels = sess.run( [tra_image_batch, tra_label_batch]) _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy], feed_dict={ x: tra_images, y_: tra_labels }) if step % 2 == 0 or (step + 1) == MAX_STEP: print('Step: %d, loss: %.4f, accuracy: %.4f%%' % (step, tra_loss, tra_acc)) _, summary_str = sess.run([train_op, summary_op], feed_dict={ x: tra_images, y_: tra_labels }) tra_summary_writer.add_summary(summary_str, step) if step % 4 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run( [val_image_batch, val_label_batch]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={ x: val_images, y_: val_labels }) print( '** Step %d, val loss = %.2f, val accuracy = %.2f%% **' % (step, val_loss, val_acc)) _, summary_str = sess.run([train_op, summary_op], feed_dict={ x: val_images, y_: val_labels }) val_summary_writer.add_summary(summary_str, step) if step % 8 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
BATCH_SIZE = 16 CAPACITY = 2000 RATIO = 0.2 MAX_STEP = 1000 # with current parameters, it is suggested to use MAX_STEP>10k learning_rate = 0.0001 # with current parameters, it is suggested to use learning rate<0.0001 # In[4]: # you need to change the directories to yours. train_dir = 'C:/MIGUEL/ML/dogs and cats/train/' logs_train_dir = 'C:/MIGUEL/ML/dogs and cats/logs/train/retrain/saveretrain' logs_val_dir = 'C:/MIGUEL/ML/dogs and cats/logs/val/' train, train_label, val, val_label = input_train_val_split.get_files( train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_train_val_split.get_batch( val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # In[5]: train_writer = tf.summary.FileWriter(logs_train_dir, tf.get_default_graph()) val_writer = tf.summary.FileWriter(logs_val_dir, tf.get_default_graph()) # In[6]: with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) saver.restore(sess, "C:/MIGUEL/ML/dogs and cats/logs/train/model.ckpt-40")