def main(): FLAGS = parser.parse_args() layers_per_block = [int(x) for x in FLAGS.layers_per_block.split(",")] try: image_paths, mask_paths = get_data_paths_list(FLAGS.train_images, FLAGS.train_masks) eval_image_paths, eval_mask_paths = get_data_paths_list( FLAGS.val_images, FLAGS.val_masks) except FileNotFoundError: print("No images found the directory specified directory") assert len(image_paths) == len( mask_paths), "Number of train images and masks found is different" assert len(eval_image_paths) == len( eval_mask_paths ), "Number of validation images and masks found is different" assert len( image_paths ) // FLAGS.batch_size > 0, "Number of training images less than batch size" assert len( eval_image_paths ) // FLAGS.batch_size > 0, "Number of validation images less than batch size" train_eval = TrainEval(image_paths, mask_paths, eval_image_paths, eval_mask_paths, FLAGS.ckpt_dir, FLAGS.num_classes) train_eval.train_eval(FLAGS.batch_size, FLAGS.growth_k, layers_per_block, FLAGS.epochs, FLAGS.learning_rate)
def train(self, train_path, val_path, save_dir, batch_size, epochs, learning_rate): """ Trains the Tiramisu on the specified training data and periodically validates on the validation data. Args: train_path: Directory where the training data is present. val_path: Directory where the validation data is present. save_dir: Directory where to save the model and training summaries. batch_size: Batch size to use for training. epochs: Number of epochs (complete passes over one dataset) to train for. learning_rate: Learning rate for the optimizer. Returns: None """ tf.logging.set_verbosity(tf.logging.INFO) train_image_path = os.path.join(train_path, 'images') train_mask_path = os.path.join(train_path, 'masks_spleen') val_image_path = os.path.join(val_path, 'images') val_mask_path = os.path.join(val_path, 'masks_spleen') assert os.path.exists( train_image_path), "No training image folder found" assert os.path.exists(train_mask_path), "No training mask folder found" assert os.path.exists( val_image_path), "No validation image folder found" assert os.path.exists(val_mask_path), "No validation mask folder found" train_image_paths, train_mask_paths = get_data_paths_list( train_image_path, train_mask_path) val_image_paths, val_mask_paths = get_data_paths_list( val_image_path, val_mask_path) assert len(train_image_paths) == len( train_mask_paths ), "Number of images and masks dont match in train folder" assert len(val_image_paths) == len( val_mask_paths ), "Number of images and masks dont match in validation folder" self.num_train_images = len(train_image_paths) self.num_val_images = len(val_image_paths) print("Loading Data") train_data, train_queue_init = utility.data_batch( train_image_paths, train_mask_paths, batch_size) train_image_tensor, train_mask_tensor = train_data eval_data, eval_queue_init = utility.data_batch( val_image_paths, val_mask_paths, batch_size) eval_image_tensor, eval_mask_tensor = eval_data print("Loading Data Finished") image_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 1]) mask_ph = tf.placeholder(tf.int32, shape=[None, 256, 256, 1]) training = tf.placeholder(tf.bool, shape=[]) if self.logits == None: self.logits = network.deeplab_v3(image_ph, is_training=True, reuse=False) #slef.logits = self.model(image_ph, training) loss = tf.reduce_mean(self.xentropy_loss(self.logits, mask_ph)) with tf.variable_scope("mean_iou_train"): iou, iou_update = self.calculate_iou(mask_ph, self.logits) merged = tf.summary.merge_all() optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt = optimizer.minimize(loss) running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="mean_iou_train") reset_iou = tf.variables_initializer(var_list=running_vars) saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() config.gpu_options.allow_growth = True print(self.num_train_images) with tf.Session(config=config) as sess: print("Initializing Variables") sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) print("Initializing Variables Finished") saver.restore(sess, tf.train.latest_checkpoint(save_dir)) print("Checkpoint restored") for epoch in range(epochs): print("Epoch: ", epoch) writer = tf.summary.FileWriter(os.path.dirname(save_dir), sess.graph) print("Epoch queue init start") sess.run([train_queue_init, eval_queue_init]) print("Epoch queue init ends") total_train_cost, total_val_cost = 0, 0 total_train_iou, total_val_iou = 0, 0 for train_step in range((13 * self.num_train_images) // batch_size - 1): image_batch, mask_batch, _ = sess.run( [train_image_tensor, train_mask_tensor, reset_iou]) #print(np.max(image_batch), np.min(image_batch)) feed_dict = { image_ph: image_batch, mask_ph: mask_batch, training: True } cost, _, _, summary = sess.run( [loss, opt, iou_update, merged], feed_dict=feed_dict) train_iou = sess.run(iou, feed_dict=feed_dict) total_train_cost += cost total_train_iou += train_iou writer.add_summary(summary, train_step) if train_step % 50 == 0: print("Step: ", train_step, "Cost: ", cost, "IoU:", train_iou) for val_step in range(self.num_val_images // batch_size): image_batch, mask_batch, _ = sess.run( [eval_image_tensor, eval_mask_tensor, reset_iou]) feed_dict = { image_ph: image_batch, mask_ph: mask_batch, training: True } eval_cost, _ = sess.run([loss, iou_update], feed_dict=feed_dict) eval_iou = sess.run(iou, feed_dict=feed_dict) total_val_cost += eval_cost total_val_iou += eval_iou print("Epoch: {0}, training loss: {1}, validation loss: {2}". format(epoch, total_train_cost / train_step, total_val_cost / val_step)) print("Epoch: {0}, training iou: {1}, val iou: {2}".format( epoch, total_train_iou / train_step, total_val_iou / val_step)) print("Saving model...") saver.save(sess, save_dir, global_step=epoch)
def train(self, train_path, val_path, save_dir, batch_size, epochs, learning_rate, prior_model): """ Trains the Tiramisu on the specified training data and periodically validates on the validation data. Args: train_path: Directory where the training data is present. val_path: Directory where the validation data is present. save_dir: Directory where to save the model and training summaries. batch_size: Batch size to use for training. epochs: Number of epochs (complete passes over one dataset) to train for. learning_rate: Learning rate for the optimizer. Returns: None """ train_image_path = os.path.join(train_path, 'images') train_mask_path = os.path.join(train_path, 'masks') val_image_path = os.path.join(val_path, 'images') val_mask_path = os.path.join(val_path, 'masks') assert os.path.exists( train_image_path), "No training image folder found" assert os.path.exists(train_mask_path), "No training mask folder found" assert os.path.exists( val_image_path), "No validation image folder found" assert os.path.exists(val_mask_path), "No validation mask folder found" train_image_paths, train_mask_paths = get_data_paths_list( train_image_path, train_mask_path) val_image_paths, val_mask_paths = get_data_paths_list( val_image_path, val_mask_path) assert len(train_image_paths) == len( train_mask_paths ), "Number of images and masks dont match in train folder" assert len(val_image_paths) == len( val_mask_paths ), "Number of images and masks dont match in validation folder" self.num_train_images = len(train_image_paths) self.num_val_images = len(val_image_paths) train_data, train_queue_init = utility.data_batch( train_image_paths, train_mask_paths, batch_size) train_image_tensor, train_mask_tensor = train_data eval_data, eval_queue_init = utility.data_batch( val_image_paths, val_mask_paths, batch_size) eval_image_tensor, eval_mask_tensor = eval_data image_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) mask_ph = tf.placeholder(tf.int32, shape=[None, 256, 256, 1]) training = tf.placeholder(tf.bool, shape=[]) if not self.logits: self.logits = self.model(image_ph, training) loss = tf.reduce_mean(self.xentropy_loss(self.logits, mask_ph)) with tf.variable_scope("mean_iou_train"): iou, iou_update = self.calculate_iou(mask_ph, self.logits) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt = optimizer.minimize(loss) running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="mean_iou_train") reset_iou = tf.variables_initializer(var_list=running_vars) with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=20) if prior_model != "": saver.restore(sess, prior_model) sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) for epoch in range(epochs): writer = tf.summary.FileWriter(os.path.dirname(save_dir), sess.graph) sess.run([train_queue_init, eval_queue_init]) total_train_cost, total_val_cost = 0, 0 total_train_iou, total_val_iou = 0, 0 for train_step in range(self.num_train_images // batch_size): image_batch, mask_batch, _ = sess.run( [train_image_tensor, train_mask_tensor, reset_iou]) feed_dict = { image_ph: image_batch, mask_ph: mask_batch, training: True } cost, _, _ = sess.run([loss, opt, iou_update], feed_dict=feed_dict) train_iou = sess.run(iou, feed_dict=feed_dict) total_train_cost += cost total_train_iou += train_iou if train_step % 50 == 0: print("Step: ", train_step, "Cost: ", cost, "IoU:", train_iou) for val_step in range(self.num_val_images // batch_size): image_batch, mask_batch, _ = sess.run( [eval_image_tensor, eval_mask_tensor, reset_iou]) feed_dict = { image_ph: image_batch, mask_ph: mask_batch, training: True } eval_cost, _ = sess.run([loss, iou_update], feed_dict=feed_dict) eval_iou = sess.run(iou, feed_dict=feed_dict) total_val_cost += eval_cost total_val_iou += eval_iou print("Epoch: {0}, training loss: {1}, validation loss: {2}". format(epoch, total_train_cost / train_step, total_val_cost / val_step)) print("Epoch: {0}, training iou: {1}, val iou: {2}".format( epoch, total_train_iou / train_step, total_val_iou / val_step)) print("Saving model...") saver.save(sess, save_dir, global_step=epoch)
def train(self, train_path, val_path, save_dir, batch_size, epochs, learning_rate,learning_policy): """ Trains the Tiramisu on the specified training data and periodically validates on the validation data. Args: train_path: Directory where the training data is present. val_path: Directory where the validation data is present. save_dir: Directory where to save the model and training summaries. batch_size: Batch size to use for training. epochs: Number of epochs (complete passes over one dataset) to train for. learning_rate: Learning rate for the optimizer. Returns: None """ train_image_path = os.path.join(train_path, 'images') train_mask_path = os.path.join(train_path, 'masks') val_image_path = os.path.join(val_path, 'images') val_mask_path = os.path.join(val_path, 'masks') assert os.path.exists(train_image_path), "No training image folder found" assert os.path.exists(train_mask_path), "No training mask folder found" assert os.path.exists(val_image_path), "No validation image folder found" assert os.path.exists(val_mask_path), "No validation mask folder found" train_image_paths, train_mask_paths = get_data_paths_list(train_image_path, train_mask_path) val_image_paths, val_mask_paths = get_data_paths_list(val_image_path, val_mask_path) assert len(train_image_paths) == len(train_mask_paths), "Number of images and masks dont match in train folder" assert len(val_image_paths) == len(val_mask_paths), "Number of images and masks dont match in validation folder" self.num_train_images = len(train_image_paths) self.num_val_images = len(val_image_paths) train_data, train_queue_init = utility.data_batch( train_image_paths, train_mask_paths, batch_size,train_flag=True) train_image_tensor, train_mask_tensor = train_data eval_data, eval_queue_init = utility.data_batch( val_image_paths, val_mask_paths, batch_size,train_flag=True) eval_image_tensor, eval_mask_tensor = eval_data image_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) mask_ph = tf.placeholder(tf.int32, shape=[None, 256, 256, 1]) training = tf.placeholder(tf.bool, shape=[]) global_step = tf.Variable(0, trainable=False) if not self.logits: self.logits = self.model(image_ph, training) regularizer = tf.contrib.layers.l2_regularizer(scale=0.001) #reg_term = tf.contrib.layers.apply_regularization(regularizer) loss = tf.reduce_mean(self.xentropy_loss(self.logits, mask_ph)) with tf.variable_scope("mean_iou_train",regularizer=regularizer): #with tf.variable_scope("mean_iou_train"): iou, iou_update = self.calculate_iou(mask_ph, self.logits) initial_learning_rate =learning_rate; if learning_policy == 'step': learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step=global_step, decay_steps=10,decay_rate=0.9) elif learning_policy == 'poly': learning_rate =tf.train.polynomial_decay(initial_learning_rate,global_step=global_step, decay_steps=10,end_learning_rate=0.0001,power=1.0, cycle=False) else: learning_rate = initial_learning_rate #RMSProp #optimizer = tf.train.RMSPropOptimizer(0.001, 0.9) #SGD optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) #Momentum #optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) #AdaGrad #optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt = optimizer.minimize(loss) running_vars = tf.get_collection( tf.GraphKeys.LOCAL_VARIABLES, scope="mean_iou_train") reset_iou = tf.variables_initializer(var_list=running_vars) saver = tf.train.Saver(max_to_keep=20) with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) total_train_losses =[] total_train_ious = [] total_val_losses = [] total_val_ious = [] steps = [] for epoch in range(epochs): writer = tf.summary.FileWriter(os.path.dirname(save_dir), sess.graph) sess.run([train_queue_init, eval_queue_init]) total_train_cost, total_val_cost = 0, 0 total_train_iou, total_val_iou = 0, 0 for train_step in range(self.num_train_images // batch_size): image_batch, mask_batch, _ = sess.run( [train_image_tensor, train_mask_tensor, reset_iou]) feed_dict = {image_ph: image_batch, mask_ph: mask_batch, training: True} cost, _, _ = sess.run( [loss, opt, iou_update], feed_dict=feed_dict) train_iou = sess.run(iou, feed_dict=feed_dict) total_train_cost += cost total_train_iou += train_iou if train_step % 50 == 0: print("Step: ", train_step, "Cost: ", cost, "IoU:", train_iou) for val_step in range(self.num_val_images // batch_size): image_batch, mask_batch, _ = sess.run( [eval_image_tensor, eval_mask_tensor, reset_iou]) feed_dict = {image_ph: image_batch, mask_ph: mask_batch, training: True} eval_cost, _ = sess.run( [loss, iou_update], feed_dict=feed_dict) eval_iou = sess.run(iou, feed_dict=feed_dict) total_val_cost += eval_cost total_val_iou += eval_iou print("Epoch: {0}, training loss: {1}, validation loss: {2}".format(epoch, total_train_cost / train_step, total_val_cost / val_step)) print("Epoch: {0}, training iou: {1}, val iou: {2}".format(epoch, total_train_iou / train_step, total_val_iou / val_step)) total_train_losses.append(total_train_cost / train_step) total_val_losses.append(total_val_cost / val_step) total_train_ious.append(total_train_iou / train_step) total_val_ious.append(total_val_iou / val_step) steps.append(epoch) print("Saving model...") saver.save(sess, save_dir, global_step=epoch) plot(steps,total_train_losses,color='r',label='train_loss') plot(steps,total_val_losses,color='g',label='val_loss') plot(steps,total_train_ious,color='k',label='train_iou') plot(steps,total_val_ious,color = 'b',label='val_iou') xlabel('epoch') ylabel('value') title('Loss-Iou') legend(loc='best') savefig('./result.jpg')