def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = net075.layers['fc1_voc12'] raw_output05 = net05.layers['fc1_voc12'] raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [-1,]) raw_gt075 = tf.reshape(label_proc075, [-1,]) raw_gt05 = tf.reshape(label_proc05, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1) indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) # Define a variable to accumulate gradients. accum_grads = [tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable] # Define an operation to clear the accumulated gradients for next batch. zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] # Compute gradients. grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) # Accumulate and normalise the gradients. accum_grads_op = [accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in enumerate(grads)] grads_conv = accum_grads[:len(conv_trainable)] grads_fc_w = accum_grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):] # Apply the gradients. train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = { step_ph : step } loss_value = 0 # Clear the accumulated gradients. sess.run(zero_op, feed_dict=feed_dict) # Accumulate gradients. for i in range(args.grad_update_every): _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict) loss_value += l_val # Normalise the loss. loss_value /= args.grad_update_every # Apply gradients. if step % args.save_pred_every == 0: images, labels, summary, _ = sess.run([image_batch, label_batch, total_summary, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: sess.run(train_op, feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, coord) image_batch, label_batch = reader.dequeue(args.batch_size) image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = net075.layers['fc1_voc12'] raw_output05 = net05.layers['fc1_voc12'] raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, n_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, n_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, n_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, n_classes]) label_proc = prepare_label(label_batch, tf.pack(raw_output.get_shape()[1:3]), one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(label_batch, tf.pack(raw_output075.get_shape()[1:3]), one_hot=False) label_proc05 = prepare_label(label_batch, tf.pack(raw_output05.get_shape()[1:3]), one_hot=False) raw_gt = tf.reshape(label_proc, [-1,]) raw_gt075 = tf.reshape(label_proc075, [-1,]) raw_gt05 = tf.reshape(label_proc05, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, n_classes - 1)), 1) indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, n_classes - 1)), 1) indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, n_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images], tf.uint8) total_summary = tf.summary.image('images', tf.concat(2, [images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=restore_var, max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = { step_ph : step } if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. # Restore all variables, or all except the last ones. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers. prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes) gt = tf.reshape(label_proc, [-1, args.num_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) optim = optimiser.minimize(reduced_loss, var_list=trainable) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim]) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, optim]) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, RANDOM_SCALE, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}) # Predictions. raw_output = net.layers['fc1_voc12'] prediction = tf.reshape(raw_output, [-1, n_classes]) label_proc = prepare_label(label_batch, tf.pack(raw_output.get_shape()[1:3])) gt = tf.reshape(label_proc, [-1, n_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(prediction, gt) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Define loss and optimisation parameters. optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) trainable = tf.trainable_variables() optim = optimiser.minimize(reduced_loss, var_list=trainable) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=trainable, max_to_keep=40) if args.restore_from is not None: load(saver, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if step % args.save_pred_every == 0: loss_value, images, labels, preds, _ = sess.run( [reduced_loss, image_batch, label_batch, pred, optim]) fig, axes = plt.subplots(args.save_num_images, 3, figsize=(16, 12)) for i in xrange(args.save_num_images): axes.flat[i * 3].set_title('data') axes.flat[i * 3].imshow( (images[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8)) axes.flat[i * 3 + 1].set_title('mask') axes.flat[i * 3 + 1].imshow(decode_labels(labels[i, :, :, 0])) axes.flat[i * 3 + 2].set_title('pred') axes.flat[i * 3 + 2].imshow(decode_labels(preds[i, :, :, 0])) plt.savefig(args.save_dir + str(start_time) + ".png") plt.close(fig) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, optim]) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. # Restore all variables, or all except the last ones. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers. raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. if not args.class_weights: loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # Multiply logits by appropriate class weight else: raw_weights = tf.gather(args.class_weights, tf.cast(raw_gt, tf.int32)) weights = tf.gather(raw_weights, indices) loss = tf.losses.sparse_softmax_cross_entropy(logits=prediction, labels=gt, weights=weights) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) optim = optimiser.minimize(reduced_loss, var_list=trainable) # Prep val data if args.val_list: val_steps = int(args.val_size / args.batch_size) with tf.name_scope("get_val"): reader_val = ImageReader( args.data_dir, args.val_list, input_size, False, False, args.ignore_label, IMG_MEAN, coord) val_image_batch, val_label_batch = reader.dequeue(args.batch_size) # Val predictions. val_raw_output = tf.image.resize_bilinear(raw_output, tf.shape(val_image_batch)[1:3,]) val_raw_output = tf.argmax(val_raw_output, dimension=3) val_pred = tf.expand_dims(val_raw_output, dim=3) # Create 4-d tensor. # mIoU val_pred = tf.reshape(val_pred, [-1,]) val_gt = tf.reshape(val_label_batch, [-1,]) weights = tf.cast(tf.less_equal(val_gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(val_pred, val_gt, num_classes=args.num_classes, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) if args.val_list: sess.run(tf.local_variables_initializer()) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim]) summary_writer.add_summary(summary, step) # Print val jaccard loss if args.val_list: for vstep in range(val_steps): val_preds, _ = sess.run([val_pred, update_op]) viou = mIoU.eval(session=sess) print('Mean IoU: {:.6f}'.format(viou)) save(saver, sess, args.snapshot_dir, step, val_iou=viou) else: save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, optim]) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() # setup used GPU os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. # Restore all variables, or all except the last ones. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers. prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes) gt = tf.reshape(label_proc, [-1, args.num_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Define loss and optimisation parameters. global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) optim = optimiser.minimize(reduced_loss, var_list=trainable, global_step=global_step) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) image_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. loss_summary = tf.summary.scalar("loss", reduced_loss) summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) print("Setting up summary op...") total_summary = tf.summary.merge_all() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40) # Load variables if the checkpoint is provided. ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) elif args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Initial status loss_value, summary, itr = sess.run( [reduced_loss, total_summary, global_step]) print('step {:d} \t loss = {:.3f}'.format(itr, loss_value)) summary_writer.add_summary(summary, itr) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() _, itr = sess.run([optim, global_step]) # save summary file if itr % 100 == 0: duration = time.time() - start_time loss_value, summary = sess.run([reduced_loss, total_summary]) summary_writer.add_summary(summary, itr) print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format( itr, loss_value, duration)) # save checkpoint if itr % args.save_pred_every == 0: save(saver, sess, args.snapshot_dir, itr) # final status loss_value, summary, itr = sess.run( [reduced_loss, total_summary, global_step]) print('step {:d} \t loss = {:.3f}'.format(itr, loss_value)) save(saver, sess, args.snapshot_dir, global_step) summary_writer.add_summary(summary, itr) coord.request_stop() coord.join(threads)
def main(): """主函数:模型构建和训练""" # 获取训练参数 args = get_arguments() # 获取输出尺寸 h, w = map(int, args.input_size.split(',')) input_size = (h, w) # 设置随机种子 tf.set_random_seed(args.random_seed) # 构建队列协调器queue coordinator coord = tf.train.Coordinator() # 加载读取器reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # 构建DeepLab-ResNet-101网络 net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # 获取网络输出. raw_output = net.layers['fc1_voc12'] # 获取不同类型的网络权重参数名 restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # 学习率lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # 学习率lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # 学习率lr * 20.0 assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # 根据ground truth类别标签,提取 raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # 构建各个像素的损失函数:交叉熵softmax loss + 权重衰减L2 loss # 交叉熵softmax loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # 权重衰减L2 loss l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # loss合并 reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # 预测结果可视化 raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # 添加图片总结 images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # 定义loss和优化参数 # 定义学习率 base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # 定义优化器 opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) # 获取loss梯度 grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] # 定义梯度优化执行操作 train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # 建立tf session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # 执行权重变量初始化 init = tf.global_variables_initializer() sess.run(init) # 获取训练存储器 saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # 加载已有的checkpoint文件 if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # 开启队列执行器线程. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # 遍历所有训练step. for step in range(args.num_steps): start_time = time.time() feed_dict = { step_ph : step } # 执行训练,并存储训练结果 if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) # 仅执行训练,不存储训练结果 else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) # 停止训练协调器 coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord, ADV_FLAG, MASK_FLAG, args.eps, args.attack, args.targeted) # image, label = reader.image, reader.label image_batch, label_batch = reader.dequeue(args.batch_size) # image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. image_name_list = reader.image_list print(image_name_list[0]) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices_0 = tf.squeeze( tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt_0 = tf.cast(tf.gather(raw_gt, indices_0), tf.int32) prediction = tf.gather(raw_prediction, indices_0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1) ## ignore all labels >= num_classes gt = tf.cast(tf.gather(gt, indices), tf.int32) pred = tf.gather(pred, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over evaluation steps. for step in range(args.num_steps): preds, _, X, Y = sess.run([pred, update_op, image_batch, label_batch]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() # setup used GPU os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) image_batch_up = tf.image.resize_bilinear( image_batch, [h * args.up_scale, w * args.up_scale]) # Create network. net = DeepLabResNetModel({'data': image_batch_up}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) # learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) learning_rate = base_lr global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable), global_step=global_step) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) image_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. loss_summary = tf.summary.scalar("loss", reduced_loss) entropy_summary = tf.summary.scalar("entropy", tf.reduce_mean(loss)) l2_loss_summary = tf.summary.scalar("L2_loss", tf.add_n(l2_losses)) learning_rate_summary = tf.summary.scalar( "learning_rate", learning_rate) # summary recording learning rate summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) print("Setting up summary op...") total_summary = tf.summary.merge_all() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2) # load weights from saved checkpoint or initial pre-trained model if os.path.isdir(args.snapshot_dir): # search checkpoint at given path ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: # load checkpoint file load(saver, sess, ckpt.model_checkpoint_path) elif os.path.isfile(args.snapshot_dir): # load checkpoint file load(saver, sess, args.snapshot_dir) elif args.restore_from is not None: loader = tf.train.Saver( var_list=restore_var) # loader for part of pre-trained model load(loader, sess, args.restore_from) else: print("No model found at{}".format(args.restore_from)) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Initial status loss_value, entropy_loss, summary, itr = sess.run( [reduced_loss, tf.reduce_mean(loss), total_summary, global_step]) print('step {:d} \t loss = {:.3f}, entropy_loss = {:.3f})'.format( itr, loss_value, entropy_loss)) summary_writer.add_summary(summary, itr) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} _, itr = sess.run([train_op, global_step], feed_dict=feed_dict) # save summary file if itr % 100 == 0: duration = time.time() - start_time loss_value, entropy_loss, summary, itr = sess.run([ reduced_loss, tf.reduce_mean(loss), total_summary, global_step ]) summary_writer.add_summary(summary, itr) print( 'step {:d} \t loss = {:.3f}, entropy_loss = {:.3f}, ({:.3f} sec/step)' .format(itr, loss_value, entropy_loss, duration)) # save checkpoint if itr % args.save_pred_every == 0: # images, labels, preds = sess.run([image_batch, label_batch, pred]) save(saver, sess, args.snapshot_dir, global_step) # final status loss_value, entropy_loss, summary, itr = sess.run( [reduced_loss, tf.reduce_mean(loss), total_summary, global_step]) print('step {:d} \t loss = {:.3f}, entropy_loss = {:.3f}'.format( itr, loss_value, entropy_loss)) save(saver, sess, args.snapshot_dir, global_step) summary_writer.add_summary(summary, itr) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = tf.global_variables() # Fine-tune only the last layers. trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] prediction = tf.reshape(raw_output, [-1, n_classes]) label_proc = prepare_label( label_batch, tf.stack(raw_output.get_shape()[1:3])) gt = tf.reshape(label_proc, [-1, n_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(prediction, gt) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear( raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func( inv_preprocess, [image_batch, args.save_num_images], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images], tf.uint8) preds_summary = tf.py_func( decode_labels, [pred, args.save_num_images], tf.uint8) total_summary = tf.summary.image('images', tf.concat( [images_summary, labels_summary, preds_summary], axis=2), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) optim = optimiser.minimize(reduced_loss, var_list=trainable) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=restore_var, max_to_keep=40) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run( [reduced_loss, image_batch, label_batch, pred, total_summary, optim]) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, optim]) duration = time.time() - start_time print( 'step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) coord.request_stop() coord.join(threads)