def main(): """Create the model and start the training.""" args = get_arguments() # Default image. image_batch = tf.constant(0, tf.float32, shape=[1, 321, 321, 3]) # Create network. net = DeepLabResNetModel({'data': image_batch}) var_list = tf.global_variables() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) # Loading .npy weights. net.load(args.npy_path, sess) # Saver for converting the loaded weights into .ckpt. saver = tf.train.Saver(var_list=var_list, write_version=1) save(saver, sess, args.save_dir)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) image_batch075 = tf.image.resize_images( image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch05 = tf.image.resize_images( image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = net075.layers['fc1_voc12'] raw_output05 = net05.layers['fc1_voc12'] raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction05, labels=gt05) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n( l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable ] # Define an operation to clear the accumulated gradients for next batch. zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] # Compute gradients. grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) # Accumulate and normalise the gradients. accum_grads_op = [ accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in enumerate(grads) ] grads_conv = accum_grads[:len(conv_trainable)] grads_fc_w = accum_grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):] # Apply the gradients. train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.snapshot_dir is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.snapshot_dir) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} loss_value = 0 # Clear the accumulated gradients. sess.run(zero_op, feed_dict=feed_dict) # Accumulate gradients. for i in range(args.grad_update_every): _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict) loss_value += l_val # Normalise the loss. loss_value /= args.grad_update_every # Apply gradients. if step % args.save_pred_every == 0: images, labels, summary, _ = sess.run( [image_batch, label_batch, total_summary, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: sess.run(train_op, feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = tf.global_variables() all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) vars_restore_gist = [ v for v in tf.global_variables() if not 'fc' in v.name and not 'mod_conv1' in v.name ] # Restore everything but last layer # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, n_classes]) label_proc = prepare_label(label_batch, tf.pack(raw_output.get_shape()[1:3]), one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, n_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func( inv_preprocess, [image_batch[:, :, :, 0:3], args.save_num_images], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(2, [images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. #summary_writer = tf.summary.FileWriter(args.snapshot_dir, # graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) tf.summary.scalar('learning_rate', learning_rate) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() # Log variables summary_writer = tf.summary.FileWriter(args.snapshot_dir, sess.graph) # MG tf.summary.scalar("reduced_loss", reduced_loss) # MG for v in conv_trainable + fc_w_trainable + fc_b_trainable: # Add histogram to all variables tf.summary.histogram(v.name.replace(":", "_"), v) merged_summary_op = tf.summary.merge_all() # MG sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=restore_var, max_to_keep=100) #modified conv1 , RGB channel restore from model c4_conv1 = [ v for v in tf.global_variables() if 'mod_conv1/weights' in v.name ][0] ori_conv1 = tf.get_variable('conv1/weights', shape=[7, 7, 3, 64]) conv_loader = tf.train.Saver([ori_conv1]) load(conv_loader, sess, args.restore_from) conv1_splits = tf.split(2, 4, c4_conv1) assign_op = c4_conv1.assign(tf.concat(2, [ori_conv1] + conv1_splits[3:])) sess.run(assign_op) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=vars_restore_gist) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run( [ reduced_loss, image_batch, label_batch, pred, merged_summary_op, train_op ], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. if not args.class_weights: loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) # Multiply logits by appropriate class weight else: raw_weights = tf.gather(args.class_weights, tf.cast(raw_gt, tf.int32)) weights = tf.gather(raw_weights, indices) loss = tf.losses.sparse_softmax_cross_entropy(logits=prediction, labels=gt, weights=weights) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Prep val data if args.val_list: val_steps = int(args.val_size / args.batch_size) with tf.name_scope("get_val"): reader_val = ImageReader(args.data_dir, args.val_list, input_size, False, False, args.ignore_label, IMG_MEAN, coord) val_image_batch, val_label_batch = reader.dequeue(args.batch_size) # Val predictions. val_raw_output = tf.image.resize_bilinear( raw_output, tf.shape(val_image_batch)[1:3, ]) val_raw_output = tf.argmax(val_raw_output, dimension=3) val_pred = tf.expand_dims(val_raw_output, dim=3) # Create 4-d tensor. # mIoU val_pred = tf.reshape(val_pred, [ -1, ]) val_gt = tf.reshape(val_label_batch, [ -1, ]) weights = tf.cast( tf.less_equal(val_gt, args.num_classes - 1), tf.int32 ) # Ignoring all labels greater than or equal to n_classes. mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( val_pred, val_gt, num_classes=args.num_classes, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) if args.val_list: sess.run(tf.local_variables_initializer()) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run( [ reduced_loss, image_batch, label_batch, pred, total_summary, train_op ], feed_dict=feed_dict) summary_writer.add_summary(summary, step) # Print val jaccard loss if args.val_list: for vstep in range(val_steps): val_preds, _ = sess.run([val_pred, update_op]) viou = mIoU.eval(session=sess) print('Mean IoU: {:.6f}'.format(viou)) save(saver, sess, args.snapshot_dir, step, val_iou=viou) else: save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() print(args) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_mask try: os.makedirs('eval/mhdout') except: pass event_end = Event() queue_proc = Queue() with open(args.data_list, 'r') as f: list_of_all_lines = f.readlines() f.seek(0) dict = {} for line in f: if re.match(".*\\/(.*)\\.mhd.*", line).group(1) not in dict: dict[re.match(".*\\/(.*)\\.mhd.*", line).group(1)] = [] dict[re.match(".*\\/(.*)\\.mhd.*", line).group(1)].append(line) with tf.Graph().as_default(): # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, (512, 512), # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord, shuffle=False) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) # weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), # tf.int32) # Ignoring all labels greater than or equal to n_classes. correct_pred = tf.equal(tf.cast(pred, tf.uint8), gt) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) accuracy_per_class = [] for i in xrange(0, args.num_classes): curr_class = tf.constant(i, tf.uint8) accuracy_per_class.append( tf.reduce_mean( tf.cast( tf.gather(correct_pred, tf.where(tf.equal(gt, curr_class))), tf.float32))) sess = tf.Session() sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. proc = Process(target=saving_process, args=(queue_proc, event_end, args.num_classes, args.data_dir, args.post_processing)) proc.start() threads = tf.train.start_queue_runners(coord=coord, sess=sess) acc_per_class = np.zeros(args.num_classes) for sublist in [ list_of_all_lines[i:i + args.batch_size] for i in xrange(0, len(list_of_all_lines), args.batch_size) ]: preds, labels, acc, acc_per_class[0], acc_per_class[1], \ acc_per_class[2], acc_per_class[3], acc_per_class[4] = sess.run( [raw_output, label_batch, accuracy, accuracy_per_class[0], accuracy_per_class[1], accuracy_per_class[2], accuracy_per_class[3], accuracy_per_class[4]]) for i, thing in enumerate(sublist): regex_match = re.match(".*\\/(.*)\\.mhd_([0-9]+).*", thing) # print(regex_match.group(1) + ' ' + str(regex_match.group(2))) queue_proc.put( (regex_match.group(1), int(regex_match.group(2)), preds[i], labels[i], acc_per_class, acc, len(dict[regex_match.group(1)]))) coord.request_stop() coord.join(threads) event_end.set() proc.join()
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, False, ## args preprocessing: random_scale, crop, mirror coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}) # Which variables to load. trainable = tf.trainable_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, label_batch, num_classes=21) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.initialize_all_variables() sess.run(init) sess.run(tf.initialize_local_variables()) # Load weights. saver = tf.train.Saver(var_list=trainable) if args.restore_from is not None: load(saver, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): #mIoU_value = sess.run([mIoU]) #_ = update_op.eval(session=sess) preds, _ = sess.run([pred, update_op]) # make the below optional #img = decode_labels(preds[0, :, :, 0]) #im = Image.fromarray(img) #im.save(args.save_dir + str(step) + '.png') if step % 100 == 0: print('step {:d} \t'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def deeplabProcessing(gpuId): """Create the model and start the evaluation process.""" print("Starting worker on GPU " + str(gpuId) + "...") def printWorker(msg): print(str(timestampMs()) + " [gpu-worker-" + str(gpuId) + "] " + msg) printWorker("Waiting for segmentation requests...") initialized = False while (not quit): fileId = requestQueue.get() # will block if fileId == "quit" + str(gpuId): printWorker("Received quit command") break printWorker("Received request for DL segmentaiton: " + fileId) printWorker("Requests queue size: " + str(requestQueue.qsize())) t1 = timestampMs() #datetime.datetime.now() imgPath = os.path.join(uploadPath, fileId) # Prepare image. imgRGB = tf.image.decode_jpeg(tf.read_file(imgPath), channels=3) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(imgRGB, 3, axis=2) imgBGR = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32) # Extract mean. imgBGR -= IMG_MEAN printWorker("Will create network") # Create network. net = DeepLabResNetModel({'data': tf.expand_dims(imgBGR, dim=0)}, is_training=False) tf.get_variable_scope().reuse_variables() printWorker("Network created") # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(imgBGR)[0:2, ]) printWorker("Predictions") # CRF. raw_output_up = tf.nn.softmax(raw_output_up) raw_output_up = tf.py_func( dense_crf, [raw_output_up, tf.expand_dims(imgRGB, dim=0)], tf.float32) printWorker("CRF") raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) if not initialized: printWorker("Setup tf session") # Set up TF session and initialize variables. config = tf.ConfigProto(device_count={'GPU': gpuId}) config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) printWorker("TF session initialized") # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, weightsModelPath) initialized = True # Perform inference. preds = sess.run(pred) msk = decode_labels(preds) im = Image.fromarray(msk[0]) maskPath = os.path.join(resultsPath, fileId) + ".png" im.save(maskPath) originalFile = os.path.join(uploadPath, fileId) os.remove(originalFile) t2 = timestampMs() #datetime.datetime.now() printWorker("Processing took " + str(t2 - t1) + "ms. Result is at " + maskPath)
def main(): """Create the model and start the training.""" args = get_arguments() print(args) if args.not_restore_last: try: shutil.rmtree(args.snapshot_dir) except Exception as e: print(e) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_mask h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. mode = tf.placeholder(tf.bool, shape=()) with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch_train, label_batch_train = reader.dequeue(args.batch_size) with tf.name_scope("val_inputs"): reader = ImageReader( args.data_dir, args.val_data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch_val, label_batch_val = reader.dequeue(args.batch_size) image_batch = tf.cond(mode, lambda: image_batch_train, lambda: image_batch_val) label_batch = tf.cond(mode, lambda: label_batch_train, lambda: label_batch_val) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [-1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) output_op = tf.cast(tf.argmax(prediction, axis=-1), tf.int32) correct_pred = tf.equal(output_op, gt) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Pixel-wise softmax loss. loss = [] accuracy_per_class = [] softmax_weights_per_class = tf.constant(LUNA16_softmax_weights, dtype=tf.float32) for i in xrange(0, args.num_classes): curr_class = tf.constant(i, tf.int32) loss.append(softmax_weights_per_class[i] * tf.losses.sparse_softmax_cross_entropy(logits=prediction, labels=gt, weights=tf.where( tf.equal(gt, curr_class), tf.zeros_like(gt), tf.ones_like(gt)))) accuracy_per_class.append( tf.reduce_mean(tf.cast(tf.gather(correct_pred, tf.where(tf.equal(gt, curr_class))), tf.float32))) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = tf.reduce_mean(tf.stack(loss)) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. reduced_loss_train = tf.Variable(0, trainable=False, dtype=tf.float32) accuracy_train = tf.Variable(0, trainable=False, dtype=tf.float32) reduced_loss_val = tf.Variable(0, trainable=False, dtype=tf.float32) accuracy_val = tf.Variable(0, trainable=False, dtype=tf.float32) reduced_loss_train = tf.cond(mode, lambda: tf.assign(reduced_loss_train, reduced_loss), lambda: reduced_loss_train) accuracy_train = tf.cond(mode, lambda: tf.assign(accuracy_train, accuracy), lambda: accuracy_train) reduced_loss_val = tf.cond(mode, lambda: reduced_loss_val, lambda: tf.assign(reduced_loss_val, reduced_loss)) accuracy_val = tf.cond(mode, lambda: accuracy_val, lambda: tf.assign(accuracy_val, accuracy)) accuracy_per_class_train = [] accuracy_per_class_val = [] for i in xrange(0, args.num_classes): temp_train_var = tf.Variable(0, trainable=False, dtype=tf.float32) temp_val_var = tf.Variable(0, trainable=False, dtype=tf.float32) accuracy_per_class_train.append( tf.cond(mode, lambda: tf.assign(temp_train_var, accuracy_per_class[i]), lambda: temp_train_var)) accuracy_per_class_val.append( tf.cond(mode, lambda: temp_val_var, lambda: tf.assign(temp_val_var, accuracy_per_class[i]))) accuracy_output = tf.cond(mode, lambda: accuracy_train, lambda: accuracy_val) loss_output = tf.cond(mode, lambda: reduced_loss_train, lambda: reduced_loss_val) tf.summary.scalar("Loss", loss_output, collections=['all']) tf.summary.scalar("Accuracy", accuracy_output, collections=['all']) images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) counter_no_reset = tf.Variable(tf.zeros([2, args.num_classes]), trainable=False, dtype=tf.float32) counter = tf.Variable(tf.zeros([2, args.num_classes]), trainable=False, dtype=tf.float32) counter_no_reset_val = tf.Variable(tf.zeros([2, args.num_classes]), trainable=False, dtype=tf.float32) counter_val = tf.Variable(tf.zeros([2, args.num_classes]), trainable=False, dtype=tf.float32) step_ph = tf.placeholder(dtype=tf.float32, shape=()) counter, counter_no_reset = tf.cond(mode, lambda: tf.py_func(update_IoU, [tf.squeeze(pred, axis=-1), tf.squeeze(label_batch, axis=-1), counter, counter_no_reset, args.num_classes, args.batch_size, step_ph, args.save_pred_every], [tf.float32, tf.float32]), lambda: [counter, counter_no_reset]) counter_val, counter_no_reset_val = tf.cond(mode, lambda: [counter_val, counter_no_reset_val], lambda: tf.py_func(update_IoU, [tf.squeeze(pred, axis=-1), tf.squeeze(label_batch, axis=-1), counter_val, counter_no_reset_val, args.num_classes, args.batch_size, step_ph, args.save_pred_every], [tf.float32, tf.float32])) eps = tf.constant(1e-10, dtype=tf.float32) IoU_summary = counter[0] / tf.add(eps, counter[1]) IoU_summary_no_reset = counter_no_reset[0] / tf.add(eps, counter_no_reset[1]) Val_IoU_summary = counter_val[0] / tf.add(eps, counter_val[1]) Val_IoU_summary_no_reset = counter_no_reset_val[0] / tf.add(eps, counter_no_reset_val[1]) mIoU = tf.reduce_mean(IoU_summary) mIoU_no_reset = tf.reduce_mean(IoU_summary_no_reset) Val_mIoU = tf.reduce_mean(Val_IoU_summary) Val_mIoU_no_reset = tf.reduce_mean(Val_IoU_summary_no_reset) IoU_summary_output_intermed = tf.cond(mode, lambda: IoU_summary, lambda: Val_IoU_summary) IoU_summary_no_reset_output_intermed = tf.cond(mode, lambda: IoU_summary_no_reset, lambda: Val_IoU_summary_no_reset) accuracy_per_class_output_intermed = tf.cond(mode, lambda: accuracy_per_class_train, lambda: accuracy_per_class_val) class_number = tf.placeholder(tf.int32, shape=()) IoU_summary_output = tf.gather(IoU_summary_output_intermed, class_number) IoU_summary_no_reset_output = tf.gather(IoU_summary_no_reset_output_intermed, class_number) accuracy_per_class_output = tf.gather(accuracy_per_class_output_intermed, class_number) tf.summary.scalar("IoU per class", IoU_summary_output, collections=['per_class']) tf.summary.scalar("IoU (no reset) per class", IoU_summary_no_reset_output, collections=['per_class']) tf.summary.scalar("Accuracy per class", accuracy_per_class_output, collections=['per_class']) mIoU_output = tf.cond(mode, lambda: mIoU, lambda: Val_mIoU) mIoU_no_reset_output = tf.cond(mode, lambda: mIoU_no_reset, lambda: Val_mIoU_no_reset) tf.summary.scalar("mIoU", mIoU_output, collections=['all']) tf.summary.scalar("mIoU no reset", mIoU_no_reset_output, collections=['all']) tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images, collections=['all']) # Concatenate row-wise. summary_writer_train = tf.summary.FileWriter(os.path.join(args.snapshot_dir, 'train_all'), graph=tf.get_default_graph()) summary_writer_val = tf.summary.FileWriter(os.path.join(args.snapshot_dir, 'val_all'), graph=tf.get_default_graph()) summary_writer_per_class_val = [] summary_writer_per_class_train = [] for i in xrange(args.num_classes): summary_writer_per_class_train.append( tf.summary.FileWriter(os.path.join(args.snapshot_dir, 'train_class_' + str(i)), graph=tf.get_default_graph())) summary_writer_per_class_val.append( tf.summary.FileWriter(os.path.join(args.snapshot_dir, 'val_class_' + str(i)), graph=tf.get_default_graph())) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) tf.summary.scalar("learning_rate", learning_rate, collections=['all']) all_summary = tf.summary.merge_all('all') per_class_summary = tf.summary.merge_all('per_class') opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. sess = tf.Session() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in xrange(1, args.num_steps + 1): start_time = time.time() # mode False -> val, mode True -> train if step % args.save_pred_every == 0: feed_dict = {step_ph: step, mode: False, class_number: step % args.num_classes} acc, loss_value, mI, mINR, _, _, _, summary_v_this_class, summary_v = sess.run( [accuracy_output, loss_output, mIoU_output, mIoU_no_reset_output, accuracy_per_class_output, IoU_summary_output, IoU_summary_no_reset_output, per_class_summary, all_summary], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) summary_writer_val.add_summary(summary_v, step) summary_writer_per_class_val[step % args.num_classes].add_summary(summary_v_this_class, step) duration = time.time() - start_time print( 'step {:d} \t Val_loss = {:.3f}, Val_acc = {:.3f}, Val_mIoU = {:.6f}, Val_mIoU_no_reset = {:.6f}, ({:.3f} sec/step)'.format( step, loss_value, acc, mI, mINR, duration)) else: feed_dict = {step_ph: step, mode: True, class_number: step % args.num_classes} acc, loss_value, mI, mINR, _, _, _, summary_t_this_class, summary_t, _ = sess.run( [accuracy_output, loss_output, mIoU_output, mIoU_no_reset_output, accuracy_per_class_output, IoU_summary_output, IoU_summary_no_reset_output, per_class_summary, all_summary, train_op], feed_dict=feed_dict) summary_writer_train.add_summary(summary_t, step) summary_writer_per_class_train[step % args.num_classes].add_summary(summary_t_this_class, step) duration = time.time() - start_time print( 'step {:d} \t loss = {:.3f}, acc = {:.3f}, mIoU = {:.6f}, mIoU_no_reset = {:.6f}, ({:.3f} sec/step)'.format( step, loss_value, acc, mI, mINR, duration)) coord.request_stop() # tboard_proc.kill() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, input_size, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) # image, label = reader.image, reader.label image_batch, label_batch = reader.dequeue(args.batch_size) # image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. image_name_list = reader.image_list x = tf.placeholder(tf.float32, shape=(BATCH_SIZE, h, w, 3)) y = tf.placeholder(tf.float32, shape=(BATCH_SIZE, h, w, 1)) # Create network. net = DeepLabResNetModel({'data': x}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(y, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices_0 = tf.squeeze( tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt_0 = tf.cast(tf.gather(raw_gt, indices_0), tf.int32) prediction = tf.gather(raw_prediction, indices_0) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt_0) img_mean_t = tf.convert_to_tensor(IMG_MEAN, dtype=tf.float32) if args.attack == 'fgs': x_adv = fgs(x, loss, args.eps, img_mean_t, input_size, BATCH_SIZE, MASK_FLAG, args.targeted) elif args.attack == 'ifgs': x_adv = fgs(x, loss, args.beta, img_mean_t, input_size, BATCH_SIZE, MASK_FLAG, args.targeted) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1) ## ignore all labels >= num_classes gt = tf.cast(tf.gather(gt, indices), tf.int32) pred = tf.gather(pred, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over evaluation steps. image_list, label_list = read_labeled_image_list(args.data_dir, args.data_list) for step in range(args.num_steps): X = image_converter(image_list, step) if args.targeted: Y = np.zeros((BATCH_SIZE, 321, 321, 1)) else: Y = label_converter(label_list, step) if args.attack == 'fgs': preds, _, X_adv = sess.run([pred, update_op, x_adv], feed_dict={ x: X, y: Y }) elif args.attack == 'ifgs': X_adv = X for i in range(args.iter): preds, _, X_adv, loss_v = sess.run( [pred, update_op, x_adv, loss], feed_dict={ x: X_adv, y: Y }) r = np.clip(X_adv - X, -args.eps, args.eps) X_adv = X + r # preds, _ = sess.run([pred, update_op], feed_dict={x: X_adv}) if SAVE_FLAG is not None: image_saver(X_adv, X, image_name_list[step], args.attack, args.eps, args.targeted) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() num_steps = file_len(os.path.join(args.img_path, args.data_list)) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( os.path.join(args.img_path, "texture"), os.path.join(args.img_path, args.data_list), None, # No defined input size. False, # No random scale. False, # No random mirror. 255, IMG_MEAN, coord, ) image, label = reader.image, reader.label title = reader.queue[0] image_batch, label_batch = ( tf.expand_dims(image, axis=0), tf.expand_dims(label, axis=0), ) # Add one batch dimension. # Create network. net = DeepLabResNetModel({"data": image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers["fc1_voc12"] before_argmax = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(before_argmax, dimension=3) pred = tf.expand_dims(raw_output_up, axis=3) hw_only = pred[0, :, :, 0] class_0 = tf.where(tf.equal(hw_only, 0)) class_1 = tf.where(tf.equal(hw_only, 1)) class_2 = tf.where(tf.equal(hw_only, 2)) class_3 = tf.where(tf.equal(hw_only, 3)) class_4 = tf.where(tf.equal(hw_only, 4)) class_5 = tf.where(tf.equal(hw_only, 5)) class_6 = tf.where(tf.equal(hw_only, 6)) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = time.time() os.makedirs(os.path.join(args.img_path, args.body_dir), exist_ok=True) os.makedirs(os.path.join(args.img_path, args.vis_dir), exist_ok=True) # write the header rois_file = os.path.join(args.img_path, "rois.csv") if os.path.isfile(rois_file): print(f"The rois file {rois_file} already exists...") ans = None while all(ans != choice for choice in ("a", "o", "q")): ans = input("Do you want to (a)ppend, (o)verwrite, or (q)uit? ") if ans == "o": print("Overwriting existing rois file...") write_header(rois_file) elif ans == "q": sys.exit(1) else: write_header(rois_file) # Perform inference. t = trange(num_steps, desc="Inference progress", unit="img") for step in t: # run through the model jpg_path, c0, c1, c2, c3, c4, c5, c6, raw_output_up_ = sess.run([ title, class_0, class_1, class_2, class_3, class_4, class_5, class_6, raw_output_up, ]) # == First, save the body segmentation == if not args.no_body: # convert to a 2D compressed matrix, because we have a lot of 0's for the # background compressed = sparse.csr_matrix(np.squeeze(raw_output_up_)) fname = os.path.splitext(os.path.basename(str(jpg_path)))[0] out = os.path.join(args.img_path, args.body_dir, fname) sparse.save_npz(out, compressed) # == Next, save the ROIs == if not args.no_rois: img_id = extract_nums_only(fname) for c in (c0, c1, c2, c3, c4, c5, c6): try: min_x = np.min(c[:, 1]) except ValueError: min_x = None try: min_y = np.min(c[:, 0]) except ValueError: min_y = None try: max_x = np.max(c[:, 1]) except ValueError: max_x = None try: max_y = np.max(c[:, 0]) except ValueError: max_y = None # write out the stuff with open(rois_file, "a") as f: f.write(",".join((img_id, str(min_x), str(min_y), str(max_x), str(max_y), "\n"))) # Save an image of the mask for our own reference every 1000 steps if not args.no_vis and step % args.visualize_step == 0: preds = np.expand_dims(raw_output_up_, axis=3) msk = decode_labels(preds, num_classes=args.num_classes) # the mask im = Image.fromarray(msk[0]) # # Save the mask separately # jpg_path = str(jpg_path).split('/')[-1].split('.')[0] # out = os.path.join(args.vis_dir, jpg_path + '.png') # im.save(out) # Save the mask with background img_orig = Image.open(jpg_path) # create the final result using the mask and the original img = np.array(im) * 0.9 + np.array(img_orig) * 0.7 # clip surpassed colors img[img > 255] = 255 img = Image.fromarray(np.uint8(img)) out = os.path.join(args.img_path, args.vis_dir, fname + ".png") img.save(out) # # print('Image processed {}.png'.format(jpg_path)) t.set_description("Finished " + fname) total_time = time.time() - start_time print( f"The output files have been saved to {args.img_path}/{args.body_dir}") print(f"It took {total_time / num_steps} sec on each image.")
# Load reader. with tf.name_scope("create_inputs"): reader = InferenceImageReader( args.input_dir, IMG_MEAN, coord, RESIZE_TO) image_orig = reader.image for rots in range(4): image = tf.image.rot90(image_orig, k=rots) image_batch = tf.expand_dims(image, dim=0) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) tf.get_variable_scope().reuse_variables() # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) # CRF. if args.crf: inv_image = tf.py_func(inv_preprocess, [image_batch, 1, IMG_MEAN], tf.uint8) raw_output = tf.py_func(dense_crf, [tf.nn.softmax(raw_output), inv_image], tf.float32) # Rotate to original
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float( tf.shape(image_batch)[2]) image_batch075 = tf.image.resize_images( image_batch, tf.pack([ tf.to_int32(tf.mul(h_orig, 0.75)), tf.to_int32(tf.mul(w_orig, 0.75)) ])) image_batch05 = tf.image.resize_images( image_batch, tf.pack([ tf.to_int32(tf.mul(h_orig, 0.5)), tf.to_int32(tf.mul(w_orig, 0.5)) ])) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=False) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=False) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=False) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = tf.image.resize_images(net075.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output05 = tf.image.resize_images(net05.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output = tf.reduce_max(tf.stack( [raw_output100, raw_output075, raw_output05]), axis=0) pred = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Get the color palette palette = voc_colour_map() f = open(args.data_list, 'r') image_names = [] for line in f: image_names.append(line) # Iterate over training steps. for step in range(args.num_steps): preds = sess.run(pred) preds = np.argmax(preds, axis=3).squeeze().astype(np.uint8) im = Image.fromarray(preds) im.putpalette(palette) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) mask_name = image_names[step].strip("\n").rsplit('/', 1)[1].replace( 'jpg', 'png') im.save(args.save_dir + "/" + mask_name) print('The segmentation masks have been saved to {}'.format(args.save_dir)) coord.request_stop() coord.join(threads)
def main(): args, preds = get_arguments(), [] # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.target_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image_orig = reader.image for rots in range(4): image = tf.image.rot90(image_orig, k=rots) image_batch = tf.expand_dims(image, dim=0) # Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) tf.get_variable_scope().reuse_variables() # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) # CRF. if args.crf: inv_image = tf.py_func(inv_preprocess, [image_batch, 1, IMG_MEAN], tf.uint8) raw_output = tf.py_func(dense_crf, [tf.nn.softmax(raw_output), inv_image], tf.float32) # Rotate to original raw_output = tf.image.rot90(tf.squeeze(raw_output), k=(4 - rots)) raw_output = tf.expand_dims(raw_output, dim=0) preds.append(raw_output) if not args.augment: break pred = tf.reduce_mean(tf.concat(preds, axis=0), axis=0) if args.heatmap < 0: pred = tf.argmax(tf.expand_dims(pred, dim=0), dimension=3) pred = tf.cast(tf.expand_dims(pred, dim=3), tf.int32) else: pred = tf.expand_dims(pred[:, :, args.heatmap], dim=0) pred = tf.cast(pred, tf.int32) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Iterate over training steps. for step in tqdm(range(args.num_steps)): preds, img_path = sess.run([pred, reader.queue[0]]) if args.heatmap < 0: preds = decode_labels(preds, num_classes=args.num_classes) im = Image.fromarray(preds[0]) else: pr = np.zeros((1, preds.shape[1], preds.shape[2], 3)) preds += abs(np.min(preds)) preds *= 255 / np.max(preds) pr[:, :, :, 0] = preds pr[:, :, :, 1] = preds pr[:, :, :, 2] = preds im = Image.fromarray(pr[0].astype('uint8')) img_name = os.path.split(img_path)[-1] im.save(os.path.join(args.save_dir + img_name)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() os.environ['CUDA_VISIBLE_DEVICES'] = '1' # Prepare image. img_list, sample_list = read_image_list() ch = 4 if cfg.ONLY_POS else 5 # Create network. with tf.device('/cpu:0'): input_imgs = tf.placeholder(tf.float32, shape=[None, None, None, ch], name='input_img') net = DeepLabResNetModel({'data': input_imgs}, is_training=False) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = tf.sigmoid(net.layers['fc1_voc12']) raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(input_imgs)[1:3, ]) preds = tf.greater_equal(tf.nn.softmax(raw_output_up), 0.5) preds = tf.cast(preds, tf.uint8) # pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) print('before run inference run.\n') # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) import pdb pdb.set_trace() for img_name, sample_name in zip(img_list, sample_list): rgbPath = os.path.join(cfg.RGB_PATH, img_name + cfg.RGB_EXT) pnPath = os.path.join(cfg.PNSAMPLE_PATH, sample_name + cfg.PNSAMPLE_EXT) labelPath = os.path.join(cfg.GT_PATH, sample_name + cfg.GT_EXT) # Perform inference. rgbimg = cv2.imread(rgbPath) pnmaps = tiff_imread(pnPath) pnmaps = pnmaps[0, ...] if cfg.ONLY_POS else np.transpose( pnmaps, [1, 2, 0]) pnmaps = pnmaps[..., np.newaxis] if cfg.ONLY_POS else pnmaps start_time = time.time() inData = np.concatenate((rgbimg, pnmaps), axis=2) inData = np.float32(inData) - cfg.IMG_MEAN[ 0:4] if cfg.ONLY_POS else np.float32(inData) - cfg.IMG_MEAN rOutput = sess.run(raw_output_up, feed_dict={input_imgs: inData[np.newaxis, ...]}) rOutput = rOutput.squeeze() img = np.zeros(rgbimg.shape) img[:, :, 0] = rOutput * 255 img = rgbimg[..., [2, 0, 1]] * 0.6 + img * 0.5 img[img > 255] = 255 img = img.astype(np.uint8) # image.imsave(args.save_dir+ sample_fname +'.png', preds.squeeze()*200, cmap = cm.gray, vmin=0, vmax=255 ) image.imsave(args.save_dir + sample_name + '_color.png', img) #, cmap = cm.gray, vmin=0, vmax=255 ) duration = time.time() - start_time print( 'The output file {} has been saved to {}. -- time: {:.3f} sec/({:d}, {:d})' .format(sample_name, args.save_dir, duration, img.shape[0], img.shape[1]))
def val(): def get_arguments(): """Parse all the arguments provided from the CLI. Returns: A list of parsed arguments. """ parser = argparse.ArgumentParser(description="DeepLabLFOV Network") parser.add_argument( "--data-dir", type=str, default=DATA_DIRECTORY, help="Path to the directory containing the PASCAL VOC dataset.") parser.add_argument( "--data-list", type=str, default=VAL_DATA_LIST_PATH, help="Path to the file listing the images in the dataset.") parser.add_argument( "--ignore-label", type=int, default=IGNORE_LABEL, help="The index of the label to ignore during the training.") parser.add_argument( "--num-classes", type=int, default=NUM_CLASSES, help="Number of classes to predict (including background).") parser.add_argument("--num-steps", type=int, default=VAL_NUM_STEPS, help="Number of images in the validation set.") parser.add_argument("--restore-from", type=str, default=SNAPSHOT_DIR, help="Where restore model parameters from.") return parser.parse_args() def load(saver, sess, ckpt_path): '''Load trained weights. Args: saver: TensorFlow saver object. sess: TensorFlow session. ckpt_path: path to checkpoint file with parameters. ''' if os.path.isdir(ckpt_path): ckpt = tf.train.get_checkpoint_state(ckpt_path) ckpt_path = ckpt.model_checkpoint_path saver.restore(sess, ckpt_path) print("Restored model parameters from {}".format(ckpt_path)) with tf.variable_scope(name_or_scope='', reuse=tf.AUTO_REUSE): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, [321, 321], # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) # tensorflow 1.3.0 conflict # weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. # mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1) # ignore all labels >= num_classes gt = tf.cast(tf.gather(gt, indices), tf.int32) pred = tf.gather(pred, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # if args.restore_from is not None: # if os.path.isdir(args.restore_from): # ckpt = tf.train.get_checkpoint_state(args.restore_from) # loader.restore(sess, ckpt.model_checkpoint_path) # else: # load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, _ = sess.run([pred, update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() # Load reader. h, w = map(int, cfg.INPUT_SIZE.split(',')) c = 4 if cfg.ONLY_POS else 5 reader_option = {"resize":True, "resize_size":[h,w]} train_dataset_reader = BatchDataset(reader_option) num_file = train_dataset_reader.get_image_number() num_steps = args.num_epochs * num_file # Create network. os.environ['CUDA_VISIBLE_DEVICES'] = '0' image_batch = tf.placeholder(tf.float32, shape=[args.batch_size, h, w, c], name='input') label_batch = tf.placeholder(tf.uint8, shape=[args.batch_size, h, w, 1], name='label') net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] fc_trainable = [v for v in all_trainable if 'fc' in v.name] # lr * 10.0 conv1_trainable = [v for v in all_trainable if 'conv1' in v.name] # lr * 20.0 conv_trainable = [v for v in all_trainable if 'fc' not in v.name and 'conv1' not in v.name] # lr * 1.0 assert(len(all_trainable) == len(fc_trainable) + len(conv1_trainable) + len(conv_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, cfg.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [-1,]) raw_prediction = tf.reshape(raw_prediction, [-1,]) gt = tf.cast(raw_gt, tf.float32) prediction = tf.cast(raw_prediction, tf.float32) # Pixel-wise softmax loss. # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss = tf.nn.weighted_cross_entropy_with_logits(targets=gt, logits=prediction, pos_weight=5, name='weighted_sigmoid') l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = tf.reduce_mean(loss)*20 + tf.add_n(l2_losses) ''' # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images], tf.uint8) total_summary = tf.summary.image('images', tf.concat([images_summary, labels_summary, preds_summary], axis=2), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir) ''' # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow(args.power, step_ph // num_steps)) opt_conv = tf.train.MomentumOptimizer(learning_rate*1, args.momentum) opt_fc = tf.train.MomentumOptimizer(learning_rate*5.0, args.momentum) opt_conv1 = tf.train.MomentumOptimizer(learning_rate*5.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_trainable + conv1_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_trainable))] grads_conv1 = grads[(len(conv_trainable) + len(fc_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc = opt_fc.apply_gradients(zip(grads_fc, fc_trainable)) train_op_conv1 = opt_conv1.apply_gradients(zip(grads_conv1, conv1_trainable)) train_op = tf.group(train_op_conv, train_op_fc, train_op_conv1) # evaluation # yjl::check the update operation pred = tf.argmax(raw_predictions, axis=-1) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, raw_gt, num_classes=n_classes, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=restore_var, max_to_keep=2)#keep_checkpoint_every_n_hours=1.0) # Load variables if the checkpoint is provided. # load_var_list = [v for v in restore_var if ('conv1' not in v.name) and ('fc1_voc12' not in v.name)] if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) # loader = tf.train.Saver(var_list=load_var_list) load(loader, sess, args.restore_from) pdb.set_trace() for step in range(num_steps): start_time = time.time() images, labels = train_dataset_reader.next_batch(args.batch_size) feed_dict = {image_batch:images, label_batch:labels, step_ph:step} #feed_dict = {step_ph:step} if step % args.save_pred_every == 0: # loss_value, preds, summary = sess.run([reduced_loss, pred, total_summary], feed_dict=feed_dict) # summary_writer.add_summary(summary, step) loss_value = sess.run([reduced_loss], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) loss_value, inf_loss, l2_loss, _ = sess.run([reduced_loss, loss, l2_losses, train_op], feed_dict=feed_dict) duration = time.time() - start_time epoch = (int)(step*args.batch_size/num_file) print('epoch {:d} /step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(epoch, step, loss_value, duration))
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Prepare image. rgbimg = cv2.imread(args.img_path) pnmaps = tiff_imread(args.pnmap_path) pnmaps = pnmaps[0, ...] if ONLY_POS else np.transpose(pnmaps, [1, 2, 0]) pnmaps = pnmaps[..., np.newaxis] if ONLY_POS else pnmaps img = np.concatenate((rgbimg, pnmaps), axis=2) h, w, ch = img.shape # Extract mean. img = np.float32(img) - IMG_MEAN[0:4] if ONLY_POS else np.float32( img) - IMG_MEAN # Create network. input_img = tf.placeholder(tf.float32, shape=[None, h, w, ch], name='input_img') net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ]) # pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) print('before run inference run.\n') # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) # pdb.set_trace() # Perform inference. start_time = time.time() preds = sess.run(raw_output_up, feed_dict={input_img: img[np.newaxis, ...]}) preds = preds.squeeze() # msk = decode_labels(preds) # im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # cv2.imwrite(args.save_dir+'mask.png', preds) image.imsave(args.save_dir + 'mask.png', preds) # ,cmap = cm.grey, vmin=0, vmax=255 ) duration = time.time() - start_time print('The output file has been saved to {} -- time: {:.4f} sec'.format( args.save_dir + 'mask.png', duration))
def _build_model(self): net = DeepLabResNetModel({'data': self._images}, is_training=self.training, num_classes=self.num_classes) self.logits = tf.squeeze(net.layers['conv6'], [1, 2])
def main(): # get arguments args = get_arguments() # setup used GPU os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU """Create the model and start the evaluation process.""" # data reader. # input image input_img = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") # img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=3, num_or_size_splits=3, value=input_img) img = tf.cast(tf.concat(axis=3, values=[img_b, img_g, img_r]), dtype=tf.float32) # Extract mean. img -= IMG_MEAN img_upscale = tf.image.resize_bilinear( img, [IMAGE_SIZE * args.up_scale, IMAGE_SIZE * args.up_scale]) # Create network. net = DeepLabResNetModel({'data': img}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. res5c_relu = net.layers['res5c_relu'] fc1_voc12_c0 = net.layers['fc1_voc12_c0'] fc1_voc12_c1 = net.layers['fc1_voc12_c1'] fc1_voc12_c2 = net.layers['fc1_voc12_c2'] fc1_voc12_c3 = net.layers['fc1_voc12_c3'] raw_output = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[1:3, ]) # raw_output_up_argmax = tf.argmax(raw_output_up, dimension=3) # pred = tf.expand_dims(raw_output_up_argmax, dim=3) pmap = tf.nn.softmax(raw_output_up, name="probability_map") # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) if os.path.isdir(args.restore_from): # search checkpoint at given path ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: # load checkpoint file load(loader, sess, ckpt.model_checkpoint_path) print("Model restored from {}".format(ckpt.model_checkpoint_path)) else: print("No model found at{}".format(args.restore_from)) elif os.path.isfile(args.restore_from): # load checkpoint file load(loader, sess, args.restore_from) else: print("No model found at{}".format(args.restore_from)) '''Perform validation on large images.''' # preds, scoremap, pmap, cnn_out, fc0, fc1, fc2, fc3 = sess.run([pred, raw_output, raw_output_up, res5c_relu, fc1_voc12_c0, fc1_voc12_c1, fc1_voc12_c2, fc1_voc12_c3], feed_dict={input_img}) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # gaussian weight kernel gfilter = gauss2D(shape=[IMAGE_SIZE, IMAGE_SIZE], sigma=(IMAGE_SIZE - 1) / 4) seg_metric = SegMetric(1) for valid_file in valid_list: print("Validate image {}".format(valid_file[0:-2])) valid_image = misc.imread( os.path.join(args.img_path, valid_file.format('.png'))) valid_truth = (misc.imread( os.path.join(args.img_path, valid_file.format('_truth.png'))) / 255).astype(np.uint8) image_shape = valid_truth.shape valid_patches = patchify(valid_image, IMAGE_SIZE, valid_stride) """divided patches into smaller batch for validation""" pred_pmap = valid_in_batch(valid_patches, sess, pmap, input_img, step=valid_batch_size) # pred_pmap = np.ones(valid_patches.shape[0:-1]) print("Stiching patches") pred_pmap_weighted = pred_pmap * gfilter[None, :, :] pred_pmap_weighted_large = unpatchify(pred_pmap_weighted, image_shape, valid_stride) gauss_mask_large = unpatchify( np.ones(pred_pmap.shape) * gfilter[None, :, :], image_shape, valid_stride) pred_pmap_weighted_large_normalized = np.nan_to_num( pred_pmap_weighted_large / gauss_mask_large) pred_binary = (pred_pmap_weighted_large_normalized > 0.5).astype( np.uint8) # mean IoU seg_metric.add_image_pair(pred_binary, valid_truth) print("mean_IU: {:.4f}".format(mean_IU(pred_binary, valid_truth))) # print("Save validation prediction") misc.imsave( os.path.join(args.save_dir, '{}_valid_pred.png'.format(valid_file[0:-2])), pred_binary) misc.imsave( os.path.join(args.save_dir, '{}_valid_pred_255.png'.format(valid_file[0:-2])), pred_binary * 255) misc.toimage(pred_pmap_weighted_large_normalized.astype(np.float32), high=1.0, low=0.0, cmin=0.0, cmax=1.0, mode='F').save( os.path.join( args.save_dir, '{}_valid_pmap.tif'.format(valid_file[0:-2]))) # # Plot PR curve # precision, recall, thresholds = precision_recall_curve(valid_truth.flatten(), pred_pmap_weighted_large_normalized.flatten(), 1) # plt.figure() # plt.plot(recall, precision, lw=2, color='navy', # label='Precision-Recall curve') # plt.xlabel('Recall') # plt.ylabel('Precision') # plt.ylim([0.0, 1.05]) # plt.xlim([0.0, 1.0]) # plt.title('Precision-Recal') # # plt.legend(loc="lower left") # plt.savefig(os.path.join(args.save_dir, '{}_PR_curve.png'.format(valid_file[0:-2]))) # msk = decode_labels(preds, num_classes=args.num_classes) # im = Image.fromarray(msk[0]) # im.save(args.save_dir + 'pred.png') print("Overal mean IoU: {:.4f}".format(seg_metric.mean_IU())) print('The output file has been saved to {}'.format(args.save_dir))
def inference_for_keypoints(image_dir, image_list, checkpoint, output_dir): # Create directory for output. if not os.path.exists(output_dir): os.system('mkdir ' + output_dir) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( image_dir, image_list, None, # No defined input size. False, # No random scale. False, # No random mirror. IGNORE_LABEL, IMG_MEAN, coord) image, label = reader.image, reader.label # Add one batch dimension. image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=NUM_CLASSES) restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. tf.train.Saver(var_list=restore_var).restore(sess, checkpoint) print('Successfully restored model parameters from {}'.format(checkpoint)) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Read list text file. with open(image_list, 'r') as txtfile: files = txtfile.readlines() # Iterate for all images. start = time.time() for idx, file in enumerate(files): # Print status. if (idx + 1) % 1000 == 0 or (idx + 1) == len(files): print(str(idx + 1) + ' / ' + str(len(files))) # Inference for human keypoints, save the label image. preds = sess.run(pred) with warnings.catch_warnings(): warnings.simplefilter("ignore") io.imsave(output_dir + '/' + file.split('.')[0] + '.png', np.uint8(np.squeeze(preds))) print('Successfully processed all the images in %.2f seconds.' % (time.time() - start)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Prepare image. img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) # Extract mean. img -= IMG_MEAN # Create network. net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ]) raw_output_up_squeeze = tf.squeeze(raw_output_up, axis=0) raw_output_up_squeeze = tf.nn.softmax(raw_output_up_squeeze, ) # raw_output_up = tf.argmax(raw_output_up, dimension=3) # print(raw_output_up.get_shape()) #(1, ?, ?) # pred = tf.expand_dims(raw_output_up, dim=3) # print(pred.get_shape()) #(1, ?, ?, 1) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) # Perform inference. start = time.time() processed_probabilities = sess.run(raw_output_up_squeeze) img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) # Convert RGB to BGR. img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) img = sess.run(img) #---------------------------------CRF import sys import pydensecrf.densecrf as dcrf from pydensecrf.utils import compute_unary, create_pairwise_bilateral, \ create_pairwise_gaussian, softmax_to_unary import skimage.io as io softmax = processed_probabilities.transpose((2, 0, 1)) # The input should be the negative of the logarithm of probability values # Look up the definition of the softmax_to_unary for more information unary = softmax_to_unary(softmax) # The inputs should be C-continious -- we are using Cython wrapper unary = np.ascontiguousarray(unary) #(21,n) d = dcrf.DenseCRF(img.shape[0] * img.shape[1], 21) d.setUnaryEnergy(unary) # This potential penalizes small pieces of segmentation that are # spatially isolated -- enforces more spatially consistent segmentations feats = create_pairwise_gaussian(sdims=(10, 10), shape=img.shape[:2]) d.addPairwiseEnergy(feats, compat=3, kernel=dcrf.DIAG_KERNEL, normalization=dcrf.NORMALIZE_SYMMETRIC) # This creates the color-dependent features -- # because the segmentation that we get from CNN are too coarse # and we can use local color features to refine them feats = create_pairwise_bilateral(sdims=(50, 50), schan=(20, 20, 20), img=img, chdim=2) d.addPairwiseEnergy(feats, compat=10, kernel=dcrf.DIAG_KERNEL, normalization=dcrf.NORMALIZE_SYMMETRIC) #迭代次数,对于IMG_1702(2592*1456)这张图,迭代5 16.807087183s 迭代20 37.5700438023s Q = d.inference(5) res = np.argmax(Q, axis=0).reshape((img.shape[0], img.shape[1])) #----------------------------------- # res = tf.expand_dims(res, dim=3) res = res[np.newaxis, :, :, np.newaxis] msk = decode_labels(res, num_classes=args.num_classes) im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) im.save(args.save_dir + '16_crf.png') end = time.time() print('{}'.format(end - start))
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) weights = tf.cast( tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, _ = sess.run([pred, update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() num_steps = file_len(args.data_list) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.img_path, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. 255, IMG_MEAN, coord) image, label = reader.image, reader.label title = reader.queue[0] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = time.time() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Perform inference. for step in range(num_steps): preds, jpg_path = sess.run([pred, title]) msk = decode_labels(preds, num_classes=args.num_classes) im = Image.fromarray(msk[0]) img_o = Image.open(jpg_path) jpg_path = jpg_path.decode() jpg_path = jpg_path.split('/')[-1].split('.')[0] img = np.array(im) * 0.9 + np.array(img_o) * 0.7 img[img > 255] = 255 img = Image.fromarray(np.uint8(img)) img.save(args.save_dir + jpg_path + '.png') print('Image processed {}.png'.format(jpg_path)) total_time = time.time() - start_time print('The output files have been saved to {}'.format(args.save_dir)) print('It took {} sec on each image.'.format(total_time / num_steps))
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader_MultiClass_Loss( args.data_dir, args.data_list, None, # No defined input size. RANDOM_SEED, False, # No random scale. False, # No random mirror. coord) image, l2_catg, binary_catg, hinge_catg = reader.image, reader.l2_catg, reader.binary_catg, reader.hinge_catg image_batch = tf.expand_dims(image, dim=0) binary_catg_batch = tf.expand_dims(binary_catg, dim=0) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] # Do the global average pooling raw_output_bcgd_rmvd = raw_output[:, :, :, 1:] g_avg_pool = tf.reduce_mean(tf.reduce_mean(raw_output_bcgd_rmvd, axis=1, keep_dims=True),\ axis=2, keep_dims=True) # Avg across the width and height dimension -> [Bx21] g_avg_pool_sqzd = tf.squeeze(g_avg_pool, axis=[1, 2]) pred = tf.nn.softmax(g_avg_pool_sqzd) # Get the class activation map raw_output_up = tf.image.resize_bilinear(raw_output_bcgd_rmvd, tf.shape(image_batch)[1:3, ]) raw_output_up = raw_output_up - tf.reduce_min(tf.reduce_min( raw_output_up, axis=1, keep_dims=True), axis=2, keep_dims=True) + EPSILON raw_output_up = raw_output_up / tf.reduce_max(tf.reduce_max( raw_output_up, axis=1, keep_dims=True), axis=2, keep_dims=True) cam_m_1 = tf.argmax(raw_output_up, dimension=3) + 1 raw_output_catgs_rmvd = raw_output_up * tf.expand_dims( tf.expand_dims(binary_catg_batch, 1), 2) cam_m_2 = tf.argmax(raw_output_catgs_rmvd, dimension=3) + 1 cam = tf.cast(tf.equal(cam_m_1, cam_m_2), tf.int64) * cam_m_1 cam_batch = tf.expand_dims(cam, dim=3) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, images, cams, bin_catg = sess.run( [pred, image_batch, cam_batch, binary_catg]) """ print(bin_catg) print(np.unique(np.unique(cams))) """ img = inv_preprocess(images) attMap = decode_labels(cams) output_dir = './output_maps_binary_without_norm/' img_name = output_dir + str(step) + '.jpg' map_name = output_dir + str(step) + '.png' misc.imsave(img_name, img[0, :, :, :]) misc.imsave(map_name, attMap[0, :, :, :]) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() print(args) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_mask try: os.makedirs('eval/niiout') except: pass event_end = Event() queue_proc = Queue() with open(args.data_list, 'r') as f: list_of_all_lines = f.readlines() f.seek(0) dict = {} for line in f: if re.match(".*\\/(.*)\\.nii.*", line).group(1) not in dict: dict[re.match(".*\\/(.*)\\.nii.*", line).group(1)] = [] dict[re.match(".*\\/(.*)\\.nii.*", line).group(1)].append(line.rsplit()[0]) with tf.Graph().as_default(): # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, (512, 512), # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord, shuffle=False) image_batch, _ = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) sess = tf.Session() sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. proc = Process(target=saving_process, args=(queue_proc, event_end, args.data_dir, args.post_processing)) proc.start() threads = tf.train.start_queue_runners(coord=coord, sess=sess) for sublist in [ list_of_all_lines[i:i + args.batch_size] for i in xrange(0, len(list_of_all_lines), args.batch_size) ]: preds = sess.run([raw_output])[0] for i, thing in enumerate(sublist): regex_match = re.match(".*\\/(.*)\\.nii_([0-9]+).*", thing) # print(regex_match.group(1) + ' ' + str(regex_match.group(2))) queue_proc.put( (regex_match.group(1), int(regex_match.group(2)), preds[i], len(dict[regex_match.group(1)]))) coord.request_stop() coord.join(threads) event_end.set() proc.join()
def main(): """Create the model and start the evaluation process.""" args = get_arguments() #image list/ label list f = open(DATA_ID_PATH, 'r') maskslist = [] for line in f: mask = line.strip("\n") maskslist.append(mask) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float( tf.shape(image_batch)[2]) image_batch075 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch05 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5)) ])) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=False, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = tf.image.resize_images(net075.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output05 = tf.image.resize_images(net05.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output = tf.reduce_max(tf.stack( [raw_output100, raw_output075, raw_output05]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # # mIoU first convert pred and gt to vector,then compute mIoU # pred = tf.reshape(pred, [-1, ]) # gt = tf.reshape(label_batch, [-1, ]) # # tensorflow 1.3.0 conflict # # weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. # # mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) # indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1) # ignore all labels >= num_classes # gt = tf.cast(tf.gather(gt, indices), tf.int32) # pred = tf.gather(pred, indices) # mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. start = time.time() for step in range(args.num_steps): # preds, _ = sess.run([pred, update_op]) once_start = time.time() preds = sess.run(pred) once_end = time.time() print('image %d, net forward cost %d s' % (step + 1, once_end - once_start)) # save predicted label.png # msk = decode_labels(preds, num_classes=args.num_classes) mask = preds msk = np.array(mask[0, :, :, 0], dtype=np.uint8) im = Image.fromarray(msk) # im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) im.save(args.save_dir + maskslist[step] + '.png') print('The output file has been saved to {}'.format(args.save_dir + maskslist[step] + '.png')) end = time.time() print('image %d, postprocessing cost %d s' % (step + 1, end - once_start)) avgfps = (step + 1) / (end - start) print('frame %d , %s /s' % (step + 1, avgfps)) if step % 100 == 0: print('step {:d}'.format(step)) # print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(data_dir=DATA_DIRECTORY, data_list=DATA_LIST_PATH, start_step=START_STEP, num_steps=NUM_STEPS,\ global_step=GLOBAL_STEP, restore_from=RESTORE_FROM, snapshot_dir=SNAPSHOT_DIR,\ base_learning_rate=LEARNING_RATE, n_classes=NUM_CLASSES, adapt=False, input_size=(321,321)): """Create the model and start the training.""" graph = tf.Graph() with graph.as_default(): tf.set_random_seed(RANDOM_SEED) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader for training. with tf.name_scope("create_inputs"): reader = ImageReader_Segment(data_dir, data_list, input_size, RANDOM_SEED, RANDOM_SCALE, RANDOM_MIRROR, n_classes, adapt, coord) image_batch, label_batch, catg_batch = reader.dequeue(BATCH_SIZE) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output_seg = net.layers['fc1_voc12'] raw_output_classfc = net.layers['fc1_voc12_d0'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = tf.global_variables() all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] fc_d_trainable = [v for v in fc_trainable if '_d0' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_c_w_trainable = [ v for v in fc_trainable if 'weights' in v.name and '_d0' not in v.name ] # lr * 10.0 fc_c_b_trainable = [ v for v in fc_trainable if 'biases' in v.name and '_d0' not in v.name ] # lr * 20.0 fc_d_w_trainable = [v for v in fc_d_trainable if 'weights' in v.name] # lr * 10.0 fc_d_b_trainable = [v for v in fc_d_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_c_w_trainable) + len(fc_c_b_trainable) +\ len(fc_d_w_trainable) + len(fc_d_b_trainable)) # Add histogram of all variables for v in conv_trainable + fc_trainable: tf.summary.histogram(v.name.replace(":", "_"), v) # Do the global average pooling g_avg_pool = tf.reduce_mean(tf.reduce_mean(raw_output_classfc, axis=1, keep_dims=True),\ axis=2, keep_dims=True) # Avg across the width and height dimension -> [Bx1x1x20] g_avg_pool_sqzd = tf.squeeze(g_avg_pool, axis=[1, 2]) # Resize the label batch to the size of predictions label_proc = tf.image.resize_nearest_neighbor( label_batch, tf.stack(raw_output_seg.get_shape()[1:3])) label_proc = tf.squeeze(label_proc, axis=3) # Change the shapes of ground truth and predictions raw_gt = tf.reshape(label_proc, [ -1, ]) raw_prediction = tf.reshape(raw_output_seg, [-1, n_classes]) # Classification loss classfc_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=g_avg_pool_sqzd, labels=catg_batch)) # Pixel-wise softmax loss. seg_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=raw_prediction, labels=raw_gt)) # L2 loss l2_losses = [ WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] # L2 and classification loss l2_classfc_loss = tf.add_n(l2_losses) + classfc_loss # Combined loss reduced_loss = seg_loss + l2_classfc_loss # Add loss to summary tf.summary.scalar("loss", reduced_loss) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output_seg, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, SAVE_NUM_IMAGES], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, SAVE_NUM_IMAGES], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, preds_summary]), max_outputs=SAVE_NUM_IMAGES) # Concatenate row-wise. merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(snapshot_dir, graph=graph) # Define loss and optimisation parameters. base_lr = tf.constant(base_learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / num_steps), POWER)) opt_conv = tf.train.MomentumOptimizer(learning_rate, MOMENTUM) opt_fc_c_w = tf.train.MomentumOptimizer(learning_rate * 10.0, MOMENTUM) opt_fc_c_b = tf.train.MomentumOptimizer( learning_rate * 20.0, MOMENTUM) # reducing the learning rate opt_fc_d_w = tf.train.MomentumOptimizer(learning_rate * 10.0, MOMENTUM) opt_fc_d_b = tf.train.MomentumOptimizer( learning_rate * 20.0, MOMENTUM) # reducing the learning rate grads_pixel_loss = tf.gradients( seg_loss, conv_trainable + fc_c_w_trainable + fc_c_b_trainable) grads_l2_classfc_loss = tf.gradients(l2_classfc_loss, conv_trainable + fc_c_w_trainable \ + fc_c_b_trainable + fc_d_w_trainable + fc_d_b_trainable) grads_conv = grads_pixel_loss[:len( conv_trainable)] + grads_l2_classfc_loss[:len(conv_trainable)] grads_fc_c_w = grads_pixel_loss[len(conv_trainable) : (len(conv_trainable) + len(fc_c_w_trainable))] \ + grads_l2_classfc_loss[len(conv_trainable) : (len(conv_trainable) + len(fc_c_w_trainable))] grads_fc_c_b = grads_pixel_loss[(len(conv_trainable) + len(fc_c_w_trainable)):] \ + grads_l2_classfc_loss[(len(conv_trainable) + len(fc_c_w_trainable)) : \ (len(conv_trainable) + len(fc_c_w_trainable) + len(fc_c_b_trainable))] grads_fc_d_w = grads_l2_classfc_loss[(len(conv_trainable) + len(fc_c_w_trainable) + len(fc_c_b_trainable)) : \ (len(conv_trainable) + len(fc_c_w_trainable) + len(fc_c_b_trainable) + \ len(fc_d_w_trainable))] grads_fc_d_b = grads_l2_classfc_loss[(len(conv_trainable) + len(fc_c_w_trainable) + len(fc_c_b_trainable) + \ len(fc_d_w_trainable)):] train_op_conv = opt_conv.apply_gradients( zip(grads_conv, conv_trainable)) train_op_fc_c_w = opt_fc_c_w.apply_gradients( zip(grads_fc_c_w, fc_c_w_trainable)) train_op_fc_c_b = opt_fc_c_b.apply_gradients( zip(grads_fc_c_b, fc_c_b_trainable)) train_op_fc_d_w = opt_fc_d_w.apply_gradients( zip(grads_fc_d_w, fc_d_w_trainable)) train_op_fc_d_b = opt_fc_d_b.apply_gradients( zip(grads_fc_d_b, fc_d_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_c_w, train_op_fc_c_b, train_op_fc_d_w, train_op_fc_d_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config, graph=graph) as sess: # Initialize the model parameters tf.global_variables_initializer().run() # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=restore_var, max_to_keep=10) # Load variables if the checkpoint is provided. if restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(start_step + 1, num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % SAVE_PRED_EVERY == 0: loss_value, summary, _ = sess.run( [reduced_loss, merged_summary, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, (step + global_step)) save(saver, sess, snapshot_dir, (step + global_step)) else: loss_value, lr, _ = sess.run( [reduced_loss, learning_rate, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} global_step {:d} \t loss = {:.3f} lr = {:.5f} ({:.3f} sec/step)'.\ format(step, step + global_step, loss_value, lr, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. # Restore all variables, or all except the last ones. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name] trainable = [v for v in tf.trainable_variables() if 'fc' in v.name] # Fine-tune only the last layers. prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes) gt = tf.reshape(label_proc, [-1, args.num_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) reduced_loss = tf.reduce_mean(loss) # Processed predictions. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. loss_summary = tf.summary.scalar('training_loss', reduced_loss) overall_summary = tf.summary.merge([total_summary, loss_summary]) summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) optim = optimiser.minimize(reduced_loss, var_list=trainable) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run([ reduced_loss, image_batch, label_batch, pred, overall_summary, optim ]) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _, summary = sess.run( [reduced_loss, optim, loss_summary]) summary_writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load validation # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label, file, mask = reader.image, reader.label, reader.image_list, reader.label_list image_batch, label_batch, file_batch, mask_batch = tf.expand_dims(image, dim=0), \ tf.expand_dims(label, dim=0), \ tf.expand_dims(file, dim=0), \ tf.expand_dims(mask, dim=0)# Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['fc1_voc12'] raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) probabilities = tf.nn.softmax(raw_output) raw_output = tf.argmax(raw_output, dimension=3) preds = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU # pred = tf.reshape(preds, [-1,]) # gt = tf.reshape(label_batch, [-1,]) # weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. # mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) file_name = file_batch mask_link = mask_batch # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. result_data = {} total_miuo_score = 0 nb = 0 global OUT_DIR global IMAGE global IMAGE_PATH for step in range(args.num_steps): predict, probmap, fb, mk = sess.run( [preds, probabilities, file_name, mask_link]) print('step {:d}'.format(step)) if IS_POD: IMAGE = fb[0][step].decode('utf8').replace( "D:\Data\POD/JpegImages/", "").replace(".jpg", "") IMAGE_PATH = "D:\Data\POD\JpegImages\\" + IMAGE + ".jpg" labels, bounding_boxs, confidence_score, miuo_score, pixel_score = \ get_bounding_boxs(probmap) result_data[IMAGE] = { "labels": labels, "confidence_score": confidence_score, "boxes": bounding_boxs, "pixel_accuracy": pixel_score, "mIoU": miuo_score } # Draw bouding boxes to image # drw_img(cv2.imread(IMAGE_PATH), labels, bounding_boxs) OUT_DIR = "mask_pod/" else: # print(mk[0][step].decode('utf8')) IMAGE = mk[0][step].decode('utf8').replace( "D:\Data\PLAD/MaskImage/", "") result_data[IMAGE] = probabilities_map_evaluate( probmap, mk[0][step].decode('utf8')) OUT_DIR = "mask_plad/" print(result_data) # Draw mask image msk = decode_labels(predict, num_classes=args.num_classes) im = Image.fromarray(msk[0]) im.save(OUT_DIR + IMAGE + '.png') # with open('predicted_boxes.json', 'w') as outfile: # json.dump(result_data, outfile) # print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. sys.stdout.flush() sys.stderr.flush() net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) sys.stdout.flush() sys.stderr.flush() # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 if args.freeze_convolutions: print("freezing backbone") all_trainable = [v for v in all_trainable if v not in conv_trainable] conv_trainable = [] assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) print("training {} variables out of total {} trainable variables".format( len(all_trainable), len(tf.trainable_variables()))) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) if not os.path.exists(args.snapshot_dir): os.mkdir(args.snapshot_dir) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_group_list = [] if len(conv_trainable) > 0: train_op_conv = opt_conv.apply_gradients( zip(grads_conv, conv_trainable)) train_op_group_list.append(train_op_conv) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_group_list.append(train_op_fc_w) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op_group_list.append(train_op_fc_b) train_op = tf.group(*train_op_group_list) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) snapshot_dir = args.snapshot_dir skip_next_arg_print = False for i in range(1, len(sys.argv)): if skip_next_arg_print: skip_next_arg_print = False continue if sys.argv[i].startswith('--snapshot-dir') or sys.argv[i].startswith( '--data-dir') or sys.argv[i].startswith('--restore-from'): skip_next_arg_print = True continue if sys.argv[i].startswith('--'): words = sys.argv[i].replace('--', '').split('-') snapshot_dir += '_' + ''.join([w[0] for w in words]) + "=" else: snapshot_dir += sys.argv[i].replace('/', '-') # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=50) start_from_step = 0 # Load variables if the checkpoint is provided. if args.restore_from is not None: if args.restore_from == RESTORE_FROM and os.path.exists( snapshot_dir) and len(os.listdir(snapshot_dir)) >= 3: restore_path = tf.train.latest_checkpoint(snapshot_dir) print("Auto restoring weights from " + str(restore_path)) else: restore_path = args.restore_from try: start_from_step = int(restore_path.split("-")[-1]) except ValueError: start_from_step = 0 print("Auto starting from step " + str(start_from_step) + " (detected from checkpoint file)") vars_in_checkpoint = get_tensors_in_checkpoint_file( file_name=restore_path) loadable_tensors = match_loaded_and_memory_tensors(vars_in_checkpoint) loadable_tensors = [ v for v in loadable_tensors if 'fc' not in v.name or not args.not_restore_last ] loader = tf.train.Saver(var_list=loadable_tensors) load(loader, sess, restore_path) sys.stdout.flush() sys.stderr.flush() # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) os.makedirs(snapshot_dir, exist_ok=True) with open(os.path.join(snapshot_dir, "args.json"), "+w") as f: f.write(json.dumps(args.__dict__, indent=2)) def should_print(step): if step < 100: return True elif step < 1000: return step % 100 == 0 elif step < 10000: return step % 1000 == 0 else: return step % 10000 == 0 loss_sum = 0 num_loss_sum = 0 # Iterate over training steps. for step in range(start_from_step, args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0 or step == args.num_steps - 1: loss_value, imgs, lbls, preds, summary, _ = sess.run( [ reduced_loss, image_batch, label_batch, pred, total_summary, train_op ], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) loss_sum += loss_value num_loss_sum += 1 duration = time.time() - start_time if should_print(step) or should_print( step - start_from_step) or step == args.num_steps - 1: print( '{:2.2f}% step {:d}/{:d} \t loss = {:.3f} , ({:.3f} sec/step)'. format( float(step / args.num_steps) * 100., step, args.num_steps, loss_sum / num_loss_sum, duration)) loss_sum = 0 num_loss_sum = 0 sys.stdout.flush() sys.stderr.flush() coord.request_stop() coord.join(threads)
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # remove_huge_images(args.data_list, args.img_path) num_steps = file_len(args.data_list) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. print(args.img_path, ' ', file_len(args.data_list)) with tf.name_scope("create_inputs"): reader = ImageReader( args.img_path, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. 255, IMG_MEAN, coord) image, label = reader.image, reader.label title = reader.queue[0] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. fc1_voc12_layer = net.layers['fc1_voc12'] raw_output_up = tf.image.resize_bilinear(fc1_voc12_layer, tf.shape(image_batch)[1:3, ]) # uncomment to see only stock segmentation # raw_output_up = tf.slice(raw_output_up, [0,0,0,0], [-1,-1,-1,7]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Set up TF session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load weights. loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.model_weights) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = time.time() os.makedirs(args.save_dir, exist_ok=True) path_parts = args.img_path.split("/") if path_parts[-1].strip() == "": path_parts = path_parts[:-1] if path_parts[0] == "": path_parts[0] = "/" bottleneck_dir = os.path.join(*path_parts[:-1], path_parts[-1] + "_hp_bottlenecks") os.makedirs(bottleneck_dir, exist_ok=True) # Perform inference. for step in range(num_steps): jpg_name = None try: preds, jpg_path, fc1_voc12_val = sess.run( [pred, title, fc1_voc12_layer]) msk = decode_labels(preds, num_classes=args.num_classes) im = Image.fromarray(msk[0]) img_o = Image.open(jpg_path) jpg_path = str(jpg_path) jpg_name = Path(jpg_path).name.split('.')[0] img = np.array(im) * 0.9 + np.array(img_o) * 0.7 img[img > 255] = 255 img = Image.fromarray(np.uint8(img)) img.save(os.path.join(args.save_dir, str(jpg_name + '.png'))) img_bgr = cv2.cvtColor(np.array(img_o), cv2.COLOR_BGR2RGB) cv2.imwrite( os.path.join(args.save_dir, "stacked_" + str(jpg_name + '.png')), np.hstack([img_bgr, im])) bottleneck_path = os.path.join(bottleneck_dir, jpg_name + "_hp_bottleneck.h5") with h5py.File(bottleneck_path, "w") as bottleneck_file: bottleneck_file.create_dataset("fc1_voc12", data=fc1_voc12_val) print('Image processed {}.png'.format(jpg_name)) print( 'Wrote human parsing bottleneck to {}'.format(bottleneck_path)) except Exception as e: print(e) print('Image failed: ', jpg_name) total_time = time.time() - start_time print('The output files have been saved to {}'.format(args.save_dir)) print('It took {} sec on each image.'.format(total_time / num_steps))