def main(image_src="src.png", image_dst="dst.png"): image_src = PointExtractor( image_filename=image_src, name="src", indicator_radius=10 ) image_src.set_points(4) src_points = image_src.get_current_points() image_dst = PointExtractor( image_filename=image_dst, name="dst", indicator_radius=10 ) image_dst.set_points(4) dst_points = image_dst.get_current_points() transform = get_homography(src_points, dst_points) result_array = warp_image(image_src.image_array, image_dst.image_array, transform) image_result = ImageReader( image_array=result_array, name="result" ) image_result.imshow()
def main(): args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [ -1, ]) label_flatten = tf.reshape(net.labels, [ -1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(label_flatten, indices), tf.int32) pred = tf.gather(pred_flatten, indices) mIoU, update_op = tf.metrics.mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) net.create_session() net.restore(cfg.model_paths[args.model]) for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): _ = net.sess.run(update_op) print('mIoU: {}'.format(net.sess.run(mIoU)))
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [-1,]) # labels gt = tf.reshape(self.label_batch, [-1,]) print("gt in test setup ", gt) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # confusion matrix self.confusion_matrix = tf.contrib.metrics.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def main(): """Create the model and start the training.""" args = get_arguments() """ Get configurations here. We pass some arguments from command line to init configurations, for training hyperparameters, you can set them in TrainConfig Class. Note: we set filter scale to 1 for pruned model, 2 for non-pruned model. The filters numbers of non-pruned model is two times larger than prunde model, e.g., [h, w, 64] <-> [h, w, 32]. """ cfg = TrainConfig(dataset=args.dataset, is_training=True, random_scale=args.random_scale, random_mirror=args.random_mirror, filter_scale=args.filter_scale) cfg.display() # Setup training network and training samples train_reader = ImageReader(cfg=cfg, mode='train') train_net = ICNet_BN(image_reader=train_reader, cfg=cfg, mode='train') """
def main(image_first="1st.jpg", image_second="2nd.jpg"): image_first = PointExtractor(image_filename=image_first, name="test1") image_first.set_points(4) image_second = PointExtractor(image_filename=image_second, name="test2") image_second.set_points(4) image_first_hists = [] for i in range(len(image_first.points)): image_first_hists.append( get_grad_hist(image_first.image_array, image_first.points[i])) image_second_hists = [] for i in range(len(image_second.points)): image_second_hists.append( get_grad_hist(image_second.image_array, image_second.points[i])) pairs = [] for i in range(4): variances = [] for j in range(4): variances.append( variance_check(image_first_hists[i], image_second_hists[j])) pairs.append((i, variances.index(min(variances)))) image_first.save() image_second.save() result_image = ImageReader(image_array=np.concatenate( (image_first.image_array, image_second.image_array), axis=1), name="concatenated") offset = image_first.image_array.shape[1] for pair in pairs: offset_point = image_second.points[pair[1]] print(offset_point) offset_point = offset_point[0] + offset, offset_point[1] result_image.draw_line(image_first.points[pair[0]], offset_point) result_image.imshow()
def main(): """Create the model and start the training.""" args = get_arguments() """ Get configurations here. We pass some arguments from command line to init configurations, for training hyperparameters, you can set them in TrainConfig Class. Note: we set filter scale to 1 for pruned model, 2 for non-pruned model. The filters numbers of non-pruned model is two times larger than prunde model, e.g., [h, w, 64] <-> [h, w, 32]. """ cfg = TrainConfig(dataset=args.dataset, is_training=True, random_scale=args.random_scale, random_mirror=args.random_mirror, filter_scale=args.filter_scale) if args.num_classes is not None: cfg.param["num_classes"] = args.num_classes if args.data_dir is not None: cfg.param["data_dir"] = args.data_dir if args.val_list is not None: cfg.param["eval_list"] = args.val_list if args.train_list is not None: cfg.param["train_list"] = args.train_list if args.ignore_label is not None: cfg.param["ignore_label"] = args.ignore_label if args.eval_size is not None: cfg.param["eval_size"] = [ int(x.strip()) for x in args.eval_size.split("x")[::-1] ] if args.training_size is not None: cfg.TRAINING_SIZE = [ int(x.strip()) for x in args.training_size.split("x")[::-1] ] if args.batch_size is not None: cfg.BATCH_SIZE = args.batch_size if args.learning_rate is not None: cfg.LEARNING_RATE = args.learning_rate if args.restore_from is not None: cfg.model_weight = args.restore_from if args.snapshot_dir is not None: cfg.SNAPSHOT_DIR = args.snapshot_dir if args.restore_from == "scratch": from tqdm import tqdm import cv2 import joblib as joblib if not args.img_mean: print( "Calculating img mean for custom dataset. To prevent this, specify it with --img-mean next time" ) image_files, annotation_files = read_labeled_image_list( cfg.param["data_dir"], cfg.param["train_list"]) means = joblib.Parallel(n_jobs=6)( joblib.delayed(calc_mean)(image_file, cv2) for image_file in tqdm(image_files, desc="calc img mean")) cfg.IMG_MEAN = np.mean(means, axis=0).tolist() else: cfg.IMG_MEAN = [float(x.strip()) for x in args.img_mean.split(",")] cfg.display() # Setup training network and training samples train_reader = ImageReader(cfg=cfg, mode='train') train_net = ICNet_BN(image_reader=train_reader, cfg=cfg, mode='train') loss_sub4, loss_sub24, loss_sub124, reduced_loss = create_losses( train_net, train_net.labels, cfg) # Setup validation network and validation samples with tf.variable_scope('', reuse=True): val_reader = ImageReader(cfg, mode='eval') val_net = ICNet_BN(image_reader=val_reader, cfg=cfg, mode='train') val_loss_sub4, val_loss_sub24, val_loss_sub124, val_reduced_loss = create_losses( val_net, val_net.labels, cfg) # Using Poly learning rate policy base_lr = tf.constant(cfg.LEARNING_RATE) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / cfg.TRAINING_STEPS), cfg.POWER)) # Set restore variable restore_var = tf.global_variables() all_trainable = [ v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma ] # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, cfg.MOMENTUM) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Create session & restore weights (Here we only need to use train_net to create session since we reuse it) train_net.create_session() if args.initializer: train_net.set_initializer(initializer_algorithm=args.initializer) train_net.initialize_variables() if not args.restore_from or args.restore_from != "scratch": train_net.restore(cfg.model_weight, restore_var) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("Total trainable parameters: " + str(total_parameters)) # Iterate over training steps. val_loss_value = 10.0 min_val_loss = float("inf") stagnation = 0 max_non_decreasing_val_loss = int( np.ceil(args.early_stopping_patience * len(train_reader.image_list) / (cfg.BATCH_SIZE * cfg.EVAL_EVERY))) print( "Maximum times that val loss can stagnate before early stopping is applied: " + str(max_non_decreasing_val_loss)) for step in range(cfg.TRAINING_STEPS): start_time = time.time() feed_dict = {step_ph: step} if step % cfg.EVAL_EVERY == 0: loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run( [ reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op ], feed_dict=feed_dict) if val_loss_value < min_val_loss: print("New best val loss {:.3f}. Saving weights...".format( val_loss_value)) train_net.save( saver, cfg.SNAPSHOT_DIR, step, model_name="val{:.3f}model.ckpt".format(val_loss_value)) min_val_loss = val_loss_value stagnation = 0 else: stagnation += 1 else: loss_value, loss1, loss2, loss3, _ = train_net.sess.run( [reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f}, val_loss: {:.3f} ({:.3f} sec/step)'.\ format(step, loss_value, loss1, loss2, loss3, val_loss_value, duration)) if stagnation > max_non_decreasing_val_loss: print("Early stopping") break
def main(model_log_dir, check_point, weight): tf.reset_default_graph() args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale, eval_path_log=os.path.join(LOG_PATH, model_log_dir)) cfg.model_paths['others'] = os.path.join(LOG_PATH, model_log_dir, 'model.ckpt-%d' % check_point) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [-1, ]) label_flatten = tf.reshape(net.labels, [-1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) pred = tf.gather(pred_flatten, indices) tnet_result = np.load(file=os.path.join(TNET_LOG_PATH, 'valid.npy')) assert cfg.dataset == 'others' duration = 0 for i in trange(10, desc='evaluation', leave=True): start = time.time() icnet_res, input, labels = net.sess.run([pred, net.images, net.labels]) end = time.time() duration += (end - start) tnet_result_i = tnet_result[i] ensemble = 0.4 * icnet_res + 0.6 * tnet_result_i ensemble[ensemble >= 0.5] = 1 ensemble[ensemble < 0.5] = 0 ensemble = np.array(np.reshape(ensemble, cfg.param['eval_size']), dtype=np.uint8) * 255 ensemble_fig = Image.fromarray(ensemble.astype(np.uint8)) input = np.squeeze(input) n_input = _extract_mean_revert(input, IMG_MEAN, swap_channel=True) n_input = n_input.astype(np.uint8) input_image = Image.fromarray(n_input, 'RGB') icnet = np.array(np.reshape(icnet_res, cfg.param['eval_size']), dtype=np.uint8) * 255 icnet = Image.fromarray(icnet.astype(np.uint8)) labels = np.squeeze(labels) * 255 labels = Image.fromarray(labels.astype(np.uint8)) fig, ax1 = plt.subplots(figsize=(10, 8)) plot1 = plt.subplot(141) plot1.set_title("Input Image", fontsize=10) plt.imshow(input_image) plt.axis('off') plot2 = plt.subplot(142) plot2.set_title("Ground Truth Mask", fontsize=10) plt.imshow(labels, cmap='gray') plt.axis('off') plot3 = plt.subplot(143) plot3.set_title("Our Result", fontsize=10) plt.imshow(icnet, cmap='gray') plt.axis('off') plot4 = plt.subplot(144) plot4.set_title("Ensemble's Result", fontsize=10) plt.imshow(ensemble_fig, cmap='gray') plt.axis('off') plt.show() save_comparation_path = os.path.dirname(cfg.model_paths['others']) + '/eval_ensemble' if os.path.exists(save_comparation_path) is False: os.mkdir(save_comparation_path) plt.savefig(os.path.join(save_comparation_path, 'eval_%d_img.png' % i)) sess = tf.get_default_session() if sess: sess._exit__(None, None, None)
def predict_setup(self): # # Create queue coordinator. # self.coord = tf.train.Coordinator() # # # Load reader # with tf.name_scope("create_inputs"): # reader = ImageReader( # self.conf.data_dir, # self.conf.test_data_list, # None, # the images have different sizes # False, # no data-aug # False, # no data-aug # self.conf.ignore_label, # IMG_MEAN, # self.coord) # image, label = reader.image, reader.label # [h, w, 3 or 1] # # # Add one batch dimension [1, h, w, 3 or 1] # image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) # image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) # image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) # # Create network # with tf.variable_scope('', reuse=False): # net = Deeplab_v2(image_batch, self.conf.num_classes, False, rescale075 = False, rescale05 = False, crf_type = self.conf.crf_type) # with tf.variable_scope('', reuse=True): # net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False, rescale075 = True, rescale05 = False, crf_type = self.conf.crf_type) # with tf.variable_scope('', reuse=True): # net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False, rescale075 = False, rescale05 = True, crf_type = self.conf.crf_type) # # predictions # # Network raw output # raw_output100 = net.outputs # self.resized_decoder100 = net.decoding # raw_output075 = net075.outputs # raw_output05 = net05.outputs # raw_output = tf.reduce_max(tf.stack([raw_output100, # tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), # tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) # raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) # raw_output = tf.argmax(raw_output, axis=3) # self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # === from test_setup() ==== # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label, sp = reader.image, reader.label, reader.sp # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch, self.sp_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0), tf.expand_dims(sp, dim=0) h_orig, w_orig = tf.to_float(tf.shape( self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2]) h_sp, w_sp = tf.to_float(tf.shape(self.sp_batch)[1]), tf.to_float( tf.shape(self.sp_batch)[2]) for i in range(1): h_sp = tf.Print(h_sp, [h_sp], message="h sp ", summarize=5) for i in range(1): w_sp = tf.Print(w_sp, [w_sp], message="w sp ", summarize=5) ''' image_batch_075 = tf.image.resize_images(self.image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch_05 = tf.image.resize_images(self.image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) ''' # Create network with tf.variable_scope('', reuse=False): if self.conf.crf_type == 'crf': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) else: net = Deeplab_v2(self.image_batch, self.conf.num_classes, False, rescale075=False, rescale05=False, crf_type=self.conf.crf_type, superpixels=self.sp_batch) ''' with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False, rescale075=True, rescale05=False, crf_type = self.conf.crf_type) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False, rescale075=False, rescale05=True, crf_type = self.conf.crf_type) ''' # predictions # Network raw output raw_output100 = net.outputs raw_output = tf.reduce_max(tf.stack([raw_output100]), axis=0) ''' raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) ''' raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) #pdb.set_trace() #pred = tf.expand_dims(raw_output, dim=3) #self.pred = tf.reshape(pred, [-1,]) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') os.makedirs(self.conf.out_dir + '/resized_decoding') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label, sp = reader.image, reader.label, reader.sp # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch, self.sp_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0), tf.expand_dims(sp, dim=0) h_orig, w_orig = tf.to_float(tf.shape( self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2]) h_sp, w_sp = tf.to_float(tf.shape(self.sp_batch)[1]), tf.to_float( tf.shape(self.sp_batch)[2]) for i in range(1): h_sp = tf.Print(h_sp, [h_sp], message="h sp ", summarize=5) for i in range(1): w_sp = tf.Print(w_sp, [w_sp], message="w sp ", summarize=5) ''' image_batch_075 = tf.image.resize_images(self.image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch_05 = tf.image.resize_images(self.image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) ''' # Create network with tf.variable_scope('', reuse=False): if self.conf.crf_type == 'crf': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) else: net = Deeplab_v2(self.image_batch, self.conf.num_classes, False, rescale075=False, rescale05=False, crf_type=self.conf.crf_type, superpixels=self.sp_batch) ''' with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False, rescale075=True, rescale05=False, crf_type = self.conf.crf_type) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False, rescale075=False, rescale05=True, crf_type = self.conf.crf_type) ''' # predictions # Network raw output raw_output100 = net.outputs raw_output = tf.reduce_max(tf.stack([raw_output100]), axis=0) ''' raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) ''' raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # confusion matrix self.confusion_matrix = tf.contrib.metrics.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
print("estimate y for ground height: {}".format(home_points[0][1])) knee_point_height = abs(batter_keypoint_heights[0][1] - home_points[0][1]) print("batter knee point height in pixels: {}".format( abs(batter_keypoint_heights[0][1] - home_points[0][1]))) print("high right point for strike zone: {}".format( (strike_zone_top_height, home_points[0][0]))) print("low left point for strike zone: {}".format( (batter_keypoint_heights[0][1], home_points[1][0]))) high_right = (int(home_points[0][0]), int(strike_zone_top_height)) low_left = (home_points[1][0], batter_keypoint_heights[0][1]) new_points = [high_right, low_left] point_extraction.revert() point_extraction.destroy_window() display_image = ImageReader(image_array=point_extraction.image_array, name="result") display_image.draw_line((high_right[0], high_right[1]), (high_right[0], low_left[1])) display_image.draw_line((high_right[0], high_right[1]), (low_left[0], high_right[1])) display_image.draw_line((low_left[0], low_left[1]), (low_left[0], high_right[1])) display_image.draw_line((low_left[0], low_left[1]), (high_right[0], low_left[1])) display_image.imshow() actual_strike_bot = abs(low_left[1] - home_points[1][1]) * ( actual_distance / pixel_distance) actual_strike_height = abs(low_left[1] - high_right[1]) * ( actual_distance / pixel_distance) actual_strike_width = abs(low_left[0] - high_right[0]) * (actual_distance /
def main(): """Create the model and start the evaluation process.""" # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(DATA_DIR, LIST_PATH, DATA_ID_LIST, None, False, False, False, coord) image, label, edge_gt = reader.image, reader.label, reader.edge image_rev = tf.reverse(image, tf.stack([1])) image_list = reader.image_list image_batch = tf.stack([image, image_rev]) label_batch = tf.expand_dims(label, dim=0) # Add one batch dimension. edge_gt_batch = tf.expand_dims(edge_gt, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float( tf.shape(image_batch)[2]) image_batch050 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.50)), tf.to_int32(tf.multiply(w_orig, 0.50)) ])) image_batch075 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch125 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 1.25)), tf.to_int32(tf.multiply(w_orig, 1.25)) ])) image_batch150 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 1.50)), tf.to_int32(tf.multiply(w_orig, 1.50)) ])) image_batch175 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 1.75)), tf.to_int32(tf.multiply(w_orig, 1.75)) ])) # Create network. with tf.variable_scope('', reuse=False): net_100 = PGNModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_050 = PGNModel({'data': image_batch050}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_075 = PGNModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_125 = PGNModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_150 = PGNModel({'data': image_batch150}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_175 = PGNModel({'data': image_batch175}, is_training=False, n_classes=N_CLASSES) # parsing net parsing_out1_050 = net_050.layers['parsing_fc'] parsing_out1_075 = net_075.layers['parsing_fc'] parsing_out1_100 = net_100.layers['parsing_fc'] parsing_out1_125 = net_125.layers['parsing_fc'] parsing_out1_150 = net_150.layers['parsing_fc'] parsing_out1_175 = net_175.layers['parsing_fc'] parsing_out2_050 = net_050.layers['parsing_rf_fc'] parsing_out2_075 = net_075.layers['parsing_rf_fc'] parsing_out2_100 = net_100.layers['parsing_rf_fc'] parsing_out2_125 = net_125.layers['parsing_rf_fc'] parsing_out2_150 = net_150.layers['parsing_rf_fc'] parsing_out2_175 = net_175.layers['parsing_rf_fc'] # edge net edge_out2_100 = net_100.layers['edge_rf_fc'] edge_out2_125 = net_125.layers['edge_rf_fc'] edge_out2_150 = net_150.layers['edge_rf_fc'] edge_out2_175 = net_175.layers['edge_rf_fc'] # combine resize parsing_out1 = tf.reduce_mean(tf.stack([ tf.image.resize_images(parsing_out1_050, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out1_075, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out1_100, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out1_125, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out1_150, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out1_175, tf.shape(image_batch)[1:3, ]) ]), axis=0) parsing_out2 = tf.reduce_mean(tf.stack([ tf.image.resize_images(parsing_out2_050, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out2_075, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out2_100, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out2_125, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out2_150, tf.shape(image_batch)[1:3, ]), tf.image.resize_images(parsing_out2_175, tf.shape(image_batch)[1:3, ]) ]), axis=0) edge_out2_100 = tf.image.resize_images(edge_out2_100, tf.shape(image_batch)[1:3, ]) edge_out2_125 = tf.image.resize_images(edge_out2_125, tf.shape(image_batch)[1:3, ]) edge_out2_150 = tf.image.resize_images(edge_out2_150, tf.shape(image_batch)[1:3, ]) edge_out2_175 = tf.image.resize_images(edge_out2_175, tf.shape(image_batch)[1:3, ]) edge_out2 = tf.reduce_mean(tf.stack( [edge_out2_100, edge_out2_125, edge_out2_150, edge_out2_175]), axis=0) raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2]), axis=0) head_output, tail_output = tf.unstack(raw_output, num=2, axis=0) tail_list = tf.unstack(tail_output, num=20, axis=2) tail_list_rev = [None] * 20 for xx in xrange(14): tail_list_rev[xx] = tail_list[xx] tail_list_rev[14] = tail_list[15] tail_list_rev[15] = tail_list[14] tail_list_rev[16] = tail_list[17] tail_list_rev[17] = tail_list[16] tail_list_rev[18] = tail_list[19] tail_list_rev[19] = tail_list[18] tail_output_rev = tf.stack(tail_list_rev, axis=2) tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1])) raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) raw_output_all = tf.expand_dims(raw_output_all, dim=0) pred_scores = tf.reduce_max(raw_output_all, axis=3) raw_output_all = tf.argmax(raw_output_all, axis=3) pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor. raw_edge = tf.reduce_mean(tf.stack([edge_out2]), axis=0) head_output, tail_output = tf.unstack(raw_edge, num=2, axis=0) tail_output_rev = tf.reverse(tail_output, tf.stack([1])) raw_edge_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) raw_edge_all = tf.expand_dims(raw_edge_all, dim=0) pred_edge = tf.sigmoid(raw_edge_all) res_edge = tf.cast(tf.greater(pred_edge, 0.5), tf.int32) # prepare ground truth preds = tf.reshape(pred_all, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) weights = tf.cast( tf.less_equal(gt, N_CLASSES - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. mIoU, update_op_iou = tf.contrib.metrics.streaming_mean_iou( preds, gt, num_classes=N_CLASSES, weights=weights) macc, update_op_acc = tf.contrib.metrics.streaming_accuracy( preds, gt, weights=weights) # precision and recall recall, update_op_recall = tf.contrib.metrics.streaming_recall( res_edge, edge_gt_batch) precision, update_op_precision = tf.contrib.metrics.streaming_precision( res_edge, edge_gt_batch) update_op = tf.group(update_op_iou, update_op_acc, update_op_recall, update_op_precision) # Which variables to load. restore_var = tf.global_variables() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if RESTORE_FROM is not None: if load(loader, sess, RESTORE_FROM): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # evaluate prosessing parsing_dir = 'dataset/parse_cihp' if os.path.exists(parsing_dir): shutil.rmtree(parsing_dir) if not os.path.exists(parsing_dir): os.makedirs(parsing_dir) # Iterate over training steps. for step in range(NUM_STEPS): parsing_, scores, edge_, _ = sess.run( [pred_all, pred_scores, pred_edge, update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print(image_list[step]) img_split = image_list[step].split('/') img_id = img_split[-1][:-4] msk = decode_labels(parsing_, num_classes=N_CLASSES) parsing_im = Image.fromarray(msk[0]) parsing_im.save('dataset/parse_cihp/person_vis.png') im = Image.open('dataset/parse_cihp/person_vis.png') new_width = 192 new_height = 256 im = im.resize((new_width, new_height), Image.ANTIALIAS) im.save('dataset/parse_cihp/person_vis.png') cv2.imwrite('dataset/parse_cihp/person.png', parsing_[0, :, :, 0]) im = Image.open('dataset/parse_cihp/person.png') im = im.resize((new_width, new_height), Image.ANTIALIAS) im.save('dataset/parse_cihp/person.png') #sio.savemat('{}/{}.mat'.format(parsing_dir, img_id), {'data': scores[0,:,:]}) #cv2.imwrite('dataset/cloth_mask/person_mask.png', edge_[0,:,:,0] * 255) res_mIou = mIoU.eval(session=sess) res_macc = macc.eval(session=sess) res_recall = recall.eval(session=sess) res_precision = precision.eval(session=sess) f1 = 2 * res_precision * res_recall / (res_precision + res_recall) print('Mean IoU: {:.4f}, Mean Acc: {:.4f}'.format(res_mIou, res_macc)) print('Recall: {:.4f}, Precision: {:.4f}, F1 score: {:.4f}'.format( res_recall, res_precision, f1)) coord.request_stop() coord.join(threads)
def train(args): ## set hyparameter img_mean = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() LAMBDA = 10 print("d_model_name:", args.d_name) print("lambda:", args.lamb) print("learning_rate:", args.learning_rate) print("is_val:", args.is_val) print("---------------------------------") ## load data with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.img_size, args.random_scale, args.random_mirror, args.random_crop, args.ignore_label, args.is_val, img_mean, coord) image_batch, label_batch = reader.dequeue(args.batch_size) print("Data is ready!") ## load model g_net = choose_generator(args.g_name, image_batch) score_map = g_net.get_output() fk_batch = tf.nn.softmax(score_map, dim=-1) gt_batch = tf.one_hot(label_batch, args.num_classes, dtype=tf.float32) x_batch = tf.train.batch([ (reader.image + img_mean) / 255., ], args.batch_size, dynamic_pad=True) # normalization d_fk_net, d_gt_net = choose_discriminator(args.d_name, fk_batch, gt_batch, x_batch) d_fk_pred = d_fk_net.get_output() # fake segmentation result in d d_gt_pred = d_gt_net.get_output() # ground-truth result in d label, logits = convert_to_calculateloss(score_map, args.num_classes, label_batch) predict_label = tf.argmax(logits, axis=1) predict_batch = g_net.topredict(score_map, tf.shape(image_batch)[1:3]) print("The model has been created!") ## get all kinds of variables list g_restore_var = [ v for v in tf.global_variables() if 'discriminator' not in v.name ] vgg_restore_var = [ v for v in tf.global_variables() if 'discriminator' in v.name and 'image' in v.name ] g_var = [ v for v in tf.trainable_variables() if 'discriminator' not in v.name ] d_var = [ v for v in tf.trainable_variables() if 'discriminator' in v.name and 'image' not in v.name ] # g_trainable_var = [v for v in g_var if 'beta' not in v.name or 'gamma' not in v.name] #batch_norm training open g_trainable_var = g_var d_trainable_var = d_var ## set loss mce_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)) g_bce_loss = -tf.reduce_mean(d_fk_pred) g_loss = mce_loss + args.lamb * g_bce_loss fk_score_var = tf.reduce_mean(d_fk_pred) gt_score_var = tf.reduce_mean(d_gt_pred) d_loss = fk_score_var - gt_score_var alpha = tf.random_uniform(shape=tf.shape(gt_batch), minval=0., maxval=1.) differences = fk_batch - gt_batch interpolates = gt_batch + (alpha * differences) gradients = tf.gradients( Discriminator_add_vgg({ 'seg': interpolates, 'data': x_batch }, reuse=True).get_output(), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2, 3])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) d_loss += gradient_penalty mce_loss_var, mce_loss_op = tf.metrics.mean(mce_loss) g_bce_loss_var, g_bce_loss_op = tf.metrics.mean(g_bce_loss) g_loss_var, g_loss_op = tf.metrics.mean(g_loss) d_loss_var, d_loss_op = tf.metrics.mean(d_loss) iou_var, iou_op = tf.metrics.mean_iou(label, predict_label, args.num_classes) accuracy_var, acc_op = tf.metrics.accuracy(label, predict_label) metrics_op = tf.group(mce_loss_op, g_bce_loss_op, g_loss_op, d_loss_op, iou_op, acc_op) ## set optimizer iterstep = tf.placeholder(dtype=tf.float32, shape=[], name='iteration_step') base_lr = tf.constant(args.learning_rate, dtype=tf.float32, shape=[]) lr = tf.scalar_mul(base_lr, tf.pow( (1 - iterstep / args.num_steps), args.power)) # learning rate reduce with the time g_gradients = tf.train.AdamOptimizer(learning_rate=lr).compute_gradients( g_loss, g_trainable_var) d_gradients = tf.train.AdamOptimizer( learning_rate=lr * 10).compute_gradients(d_loss, d_trainable_var) grad_fk_oi = tf.gradients(d_fk_pred, fk_batch, name='grad_fk_oi')[0] grad_gt_oi = tf.gradients(d_gt_pred, gt_batch, name='grad_gt_oi')[0] grad_fk_img_oi = tf.gradients(d_fk_pred, image_batch, name='grad_fk_img_oi')[0] grad_gt_img_oi = tf.gradients(d_gt_pred, image_batch, name='grad_gt_img_oi')[0] train_g_op = tf.train.AdamOptimizer(learning_rate=lr).minimize( g_loss, var_list=g_trainable_var) train_d_op = tf.train.AdamOptimizer(learning_rate=lr * 10).minimize( d_loss, var_list=d_trainable_var) ## set summary vs_image = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, img_mean], tf.uint8) vs_label = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) vs_predict = tf.py_func( decode_labels, [predict_batch, args.save_num_images, args.num_classes], tf.uint8) tf.summary.image(name='image collection_train', tensor=tf.concat(axis=2, values=[vs_image, vs_label, vs_predict]), max_outputs=args.save_num_images) tf.summary.scalar('fk_score', tf.reduce_mean(d_fk_pred)) tf.summary.scalar('gt_score', tf.reduce_mean(d_gt_pred)) tf.summary.scalar('g_loss_train', g_loss_var) tf.summary.scalar('d_loss_train', d_loss_var) tf.summary.scalar('mce_loss_train', mce_loss_var) tf.summary.scalar('g_bce_loss_train', -1. * g_bce_loss_var) tf.summary.scalar('iou_train', iou_var) tf.summary.scalar('accuracy_train', accuracy_var) tf.summary.scalar('grad_fk_oi', tf.reduce_mean(tf.abs(grad_fk_oi))) tf.summary.scalar('grad_gt_oi', tf.reduce_mean(tf.abs(grad_gt_oi))) tf.summary.scalar('grad_fk_img_oi', tf.reduce_mean(tf.abs(grad_fk_img_oi))) tf.summary.scalar('grad_gt_img_oi', tf.reduce_mean(tf.abs(grad_gt_img_oi))) for grad, var in g_gradients + d_gradients: tf.summary.histogram(var.op.name + "/gradients", grad) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph(), max_queue=3) ## set session print("GPU index:" + str(os.environ['CUDA_VISIBLE_DEVICES'])) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) global_init = tf.global_variables_initializer() local_init = tf.local_variables_initializer() sess.run(global_init) sess.run(local_init) ## set saver saver_all = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2) trained_step = 0 if os.path.exists(args.restore_from + 'checkpoint'): trained_step = load_weight(args.restore_from, saver_all, sess) else: load_weight(args.baseweight_from['d_vgg'], vgg_restore_var, sess) saver_g = tf.train.Saver(var_list=g_restore_var, max_to_keep=2) load_weight(args.baseweight_from['g'], saver_g, sess) threads = tf.train.start_queue_runners(sess, coord) print("all setting has been done,training start!") ## start training def auto_setting_train_steps(mode): if mode == 0: return 5, 1 elif mode == 1: return 1, 5 else: return 1, 1 d_train_steps = 5 g_train_steps = 1 flags = [0 for i in range(3)] for step in range(args.num_steps): now_step = int( trained_step) + step if trained_step is not None else step feed_dict = {iterstep: now_step} for i in range(d_train_steps): _, _ = sess.run([train_d_op, metrics_op], feed_dict) for i in range(g_train_steps): g_loss_, mce_loss_, g_bce_loss_, d_loss_, _, _ = sess.run([ g_loss_var, mce_loss_var, g_bce_loss_var, d_loss_var, train_g_op, metrics_op ], feed_dict) ######################## fk_score_, gt_score_ = sess.run([fk_score_var, gt_score_var], feed_dict) if fk_score_ > 0.48 and fk_score_ < 0.52: flags[0] += 1 flags[1] = flags[2] = 0 elif gt_score_ - fk_score_ > 0.3: flags[1] += 1 flags[0] = flags[2] = 0 else: flags[2] += 1 flags[0] = flags[1] = 0 if max(flags) > 100: d_train_steps, g_train_steps = auto_setting_train_steps( flags.index(max(flags))) ######################## if step > 0 and step % args.save_pred_every == 0: save_weight(args.restore_from, saver_all, sess, now_step) if step % 50 == 0 or step == args.num_steps - 1: print('step={} d_loss={} g_loss={} mce_loss={} g_bce_loss_={}'. format(now_step, d_loss_, g_loss_, mce_loss_, g_bce_loss_)) summary_str = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, now_step) sess.run(local_init) ## end training coord.request_stop() coord.join(threads) print('end....')
def train(args): ## set hyparameter img_mean = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() print("g_model_name:", args.g_name) print("lambda:", args.lambd) print("learning_rate:", args.learning_rate) print("is_val:", args.is_val) print("---------------------------------") ## load data with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.img_size, args.random_scale, args.random_mirror, args.random_crop, args.ignore_label, args.is_val, img_mean, coord) image_batch, label_batch = reader.dequeue(args.batch_size) print("Data is ready!") ## load model g_net = choose_generator(args.g_name, image_batch) score_map = g_net.get_output() # [batch_size, h, w, num_classes] label, logits = convert_to_calculateloss(score_map, args.num_classes, label_batch) predict_label = tf.argmax(logits, axis=1) predict_batch = g_net.topredict(score_map, tf.shape(image_batch)[1:3]) print("The model has been created!") ## get all kinds of variables list if '50' not in args.g_name: # aim at vgg16 g_restore_var = [ v for v in tf.global_variables() if 'generator' in v.name and 'image' in v.name ] g_trainable_var = [ v for v in tf.trainable_variables() if 'generator' in v.name and 'upscore' not in v.name ] else: # aim at resnet50 g_restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] g_trainable_var = [ v for v in tf.trainable_variables() if 'beta' not in v.name or 'gamma' not in v.name ] ## set loss mce_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)) # l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # g_loss = tf.reduce_mean(mce_loss) + tf.add_n(l2_losses) g_loss = mce_loss # don't add the penalization g_loss_var, g_loss_op = tf.metrics.mean(g_loss) iou_var, iou_op = tf.metrics.mean_iou(label, predict_label, args.num_classes) accuracy_var, acc_op = tf.metrics.accuracy(label, predict_label) metrics_op = tf.group(g_loss_op, iou_op, acc_op) ## set optimizer iterstep = tf.placeholder(dtype=tf.float32, shape=[], name='iteration_step') base_lr = tf.constant(args.learning_rate, dtype=tf.float32, shape=[]) lr = tf.scalar_mul(base_lr, tf.pow( (1 - iterstep / args.num_steps), args.power)) # learning rate reduce with the time # g_gradients = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum).compute_gradients(g_loss, # g_trainable_var) train_g_op = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum).minimize( g_loss, var_list=g_trainable_var) train_all_op = train_g_op ## set summary vs_image = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, img_mean], tf.uint8) vs_label = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) vs_predict = tf.py_func( decode_labels, [predict_batch, args.save_num_images, args.num_classes], tf.uint8) tf.summary.image(name='image collection_train', tensor=tf.concat(axis=2, values=[vs_image, vs_label, vs_predict]), max_outputs=args.save_num_images) tf.summary.scalar('g_loss_train', g_loss_var) tf.summary.scalar('iou_train', iou_var) tf.summary.scalar('accuracy_train', accuracy_var) # for grad, var in g_gradients: # tf.summary.histogram(var.op.name + "/gradients", grad) # # for var in tf.trainable_variables(): # tf.summary.histogram(var.op.name + "/values", var) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph(), max_queue=10) ## set session print("GPU index:" + str(os.environ['CUDA_VISIBLE_DEVICES'])) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) global_init = tf.global_variables_initializer() local_init = tf.local_variables_initializer() sess.run(global_init) sess.run(local_init) ## set saver saver_all = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) trained_step = 0 if os.path.exists(args.restore_from + 'checkpoint'): trained_step = load_weight(args.restore_from, saver_all, sess) else: if '50' in args.g_name: saver_g = tf.train.Saver(var_list=g_restore_var) load_weight(args.baseweight_from['res50'], saver_g, sess) elif 'vgg' in args.g_name: load_weight(args.baseweight_from['vgg16'], g_restore_var, sess) threads = tf.train.start_queue_runners(sess, coord) print("all setting has been done,training start!") ## start training for step in range(args.num_steps): now_step = int( trained_step) + step if trained_step is not None else step feed_dict = {iterstep: now_step} _, _, g_loss_ = sess.run([train_all_op, metrics_op, g_loss], feed_dict) if step > 0 and step % args.save_pred_every == 0: save_weight(args.restore_from, saver_all, sess, now_step) if step % 50 == 0 or step == args.num_steps - 1: print('step={} g_loss={}'.format(now_step, g_loss_)) summary_str = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, now_step) sess.run(local_init) ## end training coord.request_stop() coord.join(threads) print('end....')
def main(model_log_dir, check_point): tf.reset_default_graph() args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale, eval_path_log=os.path.join(LOG_PATH, model_log_dir)) cfg.model_paths['others'] = os.path.join(LOG_PATH, model_log_dir, 'model.ckpt-%d' % check_point) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [ -1, ]) label_flatten = tf.reshape(net.labels, [ -1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(label_flatten, indices), tf.int32) pred = tf.gather(pred_flatten, indices) tnet_result = np.load(file=os.path.join(TNET_LOG_PATH, 'valid.npy')) weight_list = [[0.4, 0.6], [0.5, 0.5], [0.6, 0.4]] ensemble_pred_list = [] ensemble_input = tf.placeholder(dtype=pred.dtype, shape=[None]) for weight in weight_list: ensemble_pred = tf.split( net.logits_up, 2, axis=len(net.logits_up.get_shape()) - 1)[1] * weight[0] ensemble_pred = tf.gather(tf.reshape(ensemble_pred, [ -1, ]), indices) ensemble_pred = ensemble_pred + tf.cast( ensemble_input, tf.float32) * tf.constant(weight[1]) ensemble_pred = tf.round(ensemble_pred) ensemble_pred_list.append(ensemble_pred) ensemble_mIoU_list = [] ensemble_update_op_list = [] assert cfg.dataset == 'others' mIoU, update_op = tf.metrics.mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) for ensemble_pred in ensemble_pred_list: ensemble_mIoU, ensemble_update_op = tf.metrics.mean_iou( predictions=ensemble_pred, labels=gt, num_classes=cfg.param['num_classes']) ensemble_mIoU_list.append(ensemble_mIoU) ensemble_update_op_list.append(ensemble_update_op) net.create_session() net.restore(cfg.model_paths[args.model]) duration = 0 for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): start = time.time() feed_dict = {ensemble_input: tnet_result[i]} _ = net.sess.run([update_op] + ensemble_update_op_list, feed_dict=feed_dict) end = time.time() duration += (end - start) final_mIou = net.sess.run(mIoU) ensemble_final_mIou_list = net.sess.run(ensemble_mIoU_list) print('total time:{} mean inference time:{} mIoU: {}'.format( duration, duration / cfg.param['eval_steps'], final_mIou)) for weight, ensemble_iou in zip(weight_list, ensemble_final_mIou_list): print(weight, ensemble_iou) Config.save_to_json(dict={ 'FINAL_MIOU': float(final_mIou), "EVAL_STEPS": cfg.param['eval_steps'], "ENSEMBLE_WEIGHT": weight_list, "ENSEMBLE_MIOU": [float(x) for x in ensemble_final_mIou_list] }, path=os.path.dirname(cfg.model_paths['others']), file_name='eval.json', mode='eval') sess = tf.get_default_session() if sess: sess._exit__(None, None, None)
img_size = [135, 135] crop_size = [128, 128] random_scale = False random_mirror = True random_crop = True batch_size = 8 learning_rate = 0.00001 power = 0.9 num_steps = 300000 restore_from = './weights/dvn/20171119/' g_weight_from = '' d_weight_from = '' data_dir = '/data/rui.wu/irfan/gan_seg/dvn/data/' is_train = True with tf.name_scope("create_inputs"): reader = ImageReader(data_dir, img_size, crop_size, random_scale, random_mirror, random_crop, is_train, coord) image_batch, label_batch = reader.dequeue(batch_size) print("Data is ready!") ## load model label_batch = tf.cast(label_batch, tf.uint8) image_batch = tf.cast(image_batch, tf.float32) # b = tf.zeros(label_batch.get_shape()) # a = tf.ones(label_batch.get_shape()) # label_batch_b = tf.where(tf.greater(label_batch, 0.5), a, b) real_iou = tf.placeholder(tf.float32, [batch_size, 1]) train_seg = tf.placeholder(tf.float32, [batch_size, 128, 128, 1]) train_image = tf.placeholder(tf.float32, [batch_size, 128, 128, 3]) train_seg_new = tf.cast(train_seg, tf.uint8) train_seg_new = tf.squeeze(train_seg_new, squeeze_dims=[3])
def build(self): config = self.__dict__.copy() num_labels = self.num_labels #for segmentation (pixel labels) ignore_label = 255 #for segmentation (pixel labels) random_seed = 1234 generator = self.resnetG discriminator = self.resnetD GEN_A2B_NAME = 'GEN_A2B' GEN_B2A_NAME = 'GEN_B2A' DIS_A_NAME = 'DIS_A' DIS_B_NAME = 'DIS_B' global_step = tf.train.get_or_create_global_step() slim.add_model_variable(global_step) global_step_update = tf.assign_add(global_step, 1, name='global_step_update') def resize_and_onehot(tensor, shape, depth): with tf.device('/device:CPU:0'): onehot_tensor = tf.one_hot(tf.squeeze( tf.image.resize_nearest_neighbor( tf.cast(tensor, tf.int32), shape), -1), depth=depth) return onehot_tensor def convert_to_labels(onehot_seg, crop_size=None): fake_segments_output = onehot_seg print ('%s | ' % fake_segments_output.device, fake_segments_output) if crop_size: fake_segments_output = tf.image.resize_bilinear(fake_segments_output, crop_size) #tf.shape(source_segments_batch)[1:3]) fake_segments_output = tf.argmax(fake_segments_output, axis=-1) # generate segment indices matrix fake_segments_output = tf.expand_dims(fake_segments_output, dim=-1) # Create 4-d tensor. return fake_segments_output target_data_queue = [] tf.set_random_seed(random_seed) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): for i, data in enumerate([config['source_data']] + config['target_data']): reader = ImageReader( data['data_dir'], data['data_list'], config['crop_size'], # Original size: [1024, 2048] random_scale=config['random_scale'], random_mirror=True, ignore_label=ignore_label, img_mean=0, # set IMG_MEAN to centralize image pixels (set NONE for automatic choosing) img_channel_format='RGB', # Default: BGR in deeplab_v2. See here: https://github.com/zhengyang-wang/Deeplab-v2--ResNet-101--Tensorflow/issues/30 coord=coord, rgb_label=False) data_queue = reader.dequeue(config['batch_size']) if i == 0: # ---[ source: training data source_images_batch = data_queue[0] #A: 3 chaanels source_segments_batch = data_queue[1] #B: 1-label channels source_images_batch = tf.cast(source_images_batch, tf.float32) / 127.5 - 1. source_images_batch = tf.image.resize_bilinear(source_images_batch, config['resize']) #A: 3 chaanels source_segments_batch = tf.image.resize_nearest_neighbor(source_segments_batch, config['resize']) #B: 1-label channels source_segments_batch = tf.cast(tf.one_hot(tf.squeeze(source_segments_batch, -1), depth=num_labels), tf.float32) - 0.5 #B: 19 channels else: # ---[ target: validation data / testing data target_images_batch = data_queue[0] #A: 3 chaanels target_segments_batch = data_queue[1] #B: 1-label channels target_images_batch = tf.cast(target_images_batch, tf.float32) / 127.5 - 1. target_images_batch = tf.image.resize_bilinear(target_images_batch, config['resize']) #A: 3 chaanels target_segments_batch = tf.image.resize_nearest_neighbor(target_segments_batch, config['resize']) #B: 1-label channels target_segments_batch = tf.cast(tf.one_hot(tf.squeeze(target_segments_batch, -1), depth=num_labels), tf.float32) - 0.5 #B: 19 channels target_data_queue.append([target_images_batch, target_segments_batch]) size_list = cuttool(config['batch_size'], config['gpus']) source_images_batches = tf.split(source_images_batch, size_list) source_segments_batches = tf.split(source_segments_batch, size_list) fake_1_segments_output = [None] * len(size_list) fake_2_segments_output = [None] * len(size_list) fake_1_images_output = [None] * len(size_list) fake_2_images_output = [None] * len(size_list) d_real_img_output = [None] * len(size_list) d_fake_img_output = [None] * len(size_list) d_real_seg_output = [None] * len(size_list) d_fake_seg_output = [None] * len(size_list) for gid, (source_images_batch, source_segments_batch) in \ enumerate(zip(source_images_batches, source_segments_batches)): # ---[ Generator A2B & B2A with tf.device('/device:GPU:{}'.format((gid-1) % config['gpus'])): fake_seg = generator(source_images_batch, output_channel=num_labels, reuse=tf.AUTO_REUSE, phase_train=True, scope=GEN_A2B_NAME) fake_seg = tf.nn.softmax(fake_seg) - 0.5 fake_img_ = generator(fake_seg, output_channel=3, reuse=tf.AUTO_REUSE, phase_train=True, scope=GEN_B2A_NAME) fake_img_ = tf.nn.tanh(fake_img_) fake_img = generator(source_segments_batch, output_channel=3, reuse=tf.AUTO_REUSE, phase_train=True, scope=GEN_B2A_NAME) fake_img = tf.nn.tanh(fake_img) fake_seg_ = generator(fake_img, output_channel=num_labels, reuse=tf.AUTO_REUSE, phase_train=True, scope=GEN_A2B_NAME) fake_seg_ = tf.nn.softmax(fake_seg_) - 0.5 # ---[ Discriminator A & B with tf.device('/device:GPU:{}'.format((gid-1) % config['gpus'])): d_real_img = discriminator(source_images_batch, reuse=tf.AUTO_REUSE, phase_train=True, scope=DIS_A_NAME) d_fake_img = discriminator(fake_img, reuse=tf.AUTO_REUSE, phase_train=True, scope=DIS_A_NAME) d_real_seg = discriminator(source_segments_batch, reuse=tf.AUTO_REUSE, phase_train=True, scope=DIS_B_NAME) d_fake_seg = discriminator(fake_seg, reuse=tf.AUTO_REUSE, phase_train=True, scope=DIS_B_NAME) #d_fake_img_val = discriminator(fake_img_val, reuse=tf.AUTO_REUSE, phase_train=False, scope=DIS_A_NAME) #d_fake_seg_val = discriminator(fake_seg_val, reuse=tf.AUTO_REUSE, phase_train=False, scope=DIS_B_NAME) fake_1_segments_output [gid] = fake_seg fake_2_segments_output [gid] = fake_seg_ fake_1_images_output [gid] = fake_img fake_2_images_output [gid] = fake_img_ d_real_img_output [gid] = d_real_img d_fake_img_output [gid] = d_fake_img d_real_seg_output [gid] = d_real_seg d_fake_seg_output [gid] = d_fake_seg source_images_batch = tf.concat(source_images_batches, axis=0) #-1~1 source_segments_batch = tf.concat(source_segments_batches, axis=0) #onehot: -0.5~+0.5 fake_1_segments_output = tf.concat(fake_1_segments_output, axis=0) ; print('fake_1_segments_output', fake_1_segments_output) fake_2_segments_output = tf.concat(fake_2_segments_output, axis=0) ; print('fake_2_segments_output', fake_2_segments_output) fake_1_images_output = tf.concat(fake_1_images_output , axis=0) ; print('fake_1_images_output ', fake_1_images_output ) fake_2_images_output = tf.concat(fake_2_images_output , axis=0) ; print('fake_2_images_output ', fake_2_images_output ) d_real_img_output = tf.concat(d_real_img_output , axis=0) d_fake_img_output = tf.concat(d_fake_img_output , axis=0) d_real_seg_output = tf.concat(d_real_seg_output , axis=0) d_fake_seg_output = tf.concat(d_fake_seg_output , axis=0) source_data_color = [ (1.+source_images_batch ) / 2. , # source_images_batch_color sgtools.decode_labels(tf.cast(convert_to_labels(source_segments_batch + 0.5), tf.int32), num_labels), # source_segments_batch_colo sgtools.decode_labels(tf.cast(convert_to_labels(fake_1_segments_output + 0.5), tf.int32), num_labels), # fake_1_segments_output_col sgtools.decode_labels(tf.cast(convert_to_labels(fake_2_segments_output + 0.5), tf.int32), num_labels), # fake_2_segments_output_col (1.+fake_1_images_output ) / 2. , # fake_1_images_output_color (1.+fake_2_images_output ) / 2. , # fake_2_images_output_color ] # ---[ Validation Model target_data_color_queue = [] for target_data in target_data_queue: with tf.device('/device:GPU:{}'.format((2) % config['gpus'])): fake_seg = generator(val_images_holder, output_channel=num_labels, reuse=tf.AUTO_REUSE, phase_train=False, scope=GEN_A2B_NAME) fake_seg = tf.nn.softmax(fake_seg) - 0.5 fake_img_ = generator(fake_seg, output_channel=3, reuse=tf.AUTO_REUSE, phase_train=False, scope=GEN_B2A_NAME) fake_img_ = tf.nn.tanh(fake_img_) fake_img = generator(val_segments_holder, output_channel=3, reuse=tf.AUTO_REUSE, phase_train=False, scope=GEN_B2A_NAME) fake_img = tf.nn.tanh(fake_img) fake_seg_ = generator(fake_img, output_channel=num_labels, reuse=tf.AUTO_REUSE, phase_train=False, scope=GEN_A2B_NAME) fake_seg_ = tf.nn.softmax(fake_seg) - 0.5 target_data_color_queue.append([ (1.+target_images_batch ) / 2. , # target_images_batch_color sgtools.decode_labels(tf.cast(convert_to_labels(target_segments_batch + 0.5), tf.int32), num_labels) , # target_segments_batch_color sgtools.decode_labels(tf.cast(convert_to_labels(fake_seg + 0.5), tf.int32), num_labels) , # val_fake_1_segments_output_color sgtools.decode_labels(tf.cast(convert_to_labels(fake_seg_ + 0.5), tf.int32), num_labels) , # val_fake_2_segments_output_color (1.+val_fake_1_images_output ) / 2. , # val_fake_1_images_output_color (1.+val_fake_2_images_output ) / 2. , # val_fake_2_images_output_color ]) # ---[ Segment-level loss: pixelwise loss # d_seg_batch = tf.image.resize_nearest_neighbor(seg_gt, tf.shape(_d_real['segment'])[1:3]) # d_seg_batch = tf.squeeze(d_seg_batch, -1) # d_seg_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=d_seg_batch, logits=_d_real['segment'], name='segment_pixelwise_loss') # pixel-wise loss # d_seg_loss = tf.reduce_mean(d_seg_loss) # d_seg_loss = tf.identity(d_seg_loss, name='d_seg_loss') # ---[ GAN Loss: crite loss #d_loss_old = - (tf.reduce_mean(d_source_output['critic']) - tf.reduce_mean(d_target_output['critic'])) #g_loss = - (tf.reduce_mean(d_target_output['critic'])) ## gradient penalty #LAMBDA = 10 ##alpha = tf.placeholder(tf.float32, shape=[None], name='alpha') #alpha = tf.random_uniform([config['batch_size']], 0.0, 1.0, dtype=tf.float32) #for _ in source_segments_batch.shape[1:]: #alpha = tf.expand_dims(alpha, axis=1) #shape=[None,1,1,1] #interpolates = alpha * source_segments_batch + (1.-alpha) * target_segments_output #print ('source_segments_batch:', source_segments_batch) #print ('target_segments_output:',target_segments_output) #print ('interpolates:', interpolates) #interpolates = resize_and_onehot(interpolates, target_raw_segments_output.shape.as_list()[1:3], num_labels) #print ('interpolates:', interpolates) #_d_intp = discriminator(interpolates, reuse=True, phase_train=True, scope=DIS_NAME) #intp_grads = tf.gradients(_d_intp['critic'], [interpolates])[0] #slopes = tf.sqrt(tf.reduce_sum(tf.square(intp_grads), reduction_indices=[1])) #L2-distance #grads_penalty = tf.reduce_mean(tf.square(slopes-1), name='grads_penalty') #d_loss = d_loss_old + LAMBDA * grads_penalty def sigmoid_cross_entropy(labels, logits): return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) ) def least_square(labels, logits): return tf.reduce_mean( (labels - logits) ** 2 ) if config['loss_mode'] == 'lsgan': # ---[ GAN loss: LSGAN loss (chi-square, or called least-square) loss_func = least_square else: # ---[ GAN loss: sigmoid BCE loss loss_func = sigmoid_cross_entropy # ---[ LOSS _img_recovery = config['L1_lambda'] * tf.reduce_mean( tf.abs(source_images_batch - fake_2_images_output)) #_seg_recovery = config['L1_lambda'] * tf.reduce_mean( tf.abs(source_segments_batch - fake_1_segments_output)) #r1.0: error #_seg_recovery = config['L1_lambda'] * tf.reduce_mean( tf.abs(source_segments_batch - fake_2_segments_output)) #r2.0 _seg_recovery = config['L1_lambda'] * tf.reduce_mean( tf.abs(source_segments_batch_color - fake_2_segments_output_color)) #r2.0.5: not sure because, in theory, no gradient if using decode_labels() g_loss_a2b = \ loss_func( labels=tf.ones_like(d_fake_seg_output), logits=d_fake_seg_output ) + \ _img_recovery + _seg_recovery g_loss_b2a = \ loss_func( labels=tf.ones_like(d_fake_img_output), logits=d_fake_img_output ) + \ _img_recovery + _seg_recovery g_loss = \ loss_func( labels=tf.ones_like(d_fake_seg_output), logits=d_fake_seg_output ) + \ loss_func( labels=tf.ones_like(d_fake_img_output), logits=d_fake_img_output ) + \ _img_recovery + _seg_recovery da_loss = \ loss_func( labels=tf.ones_like(d_real_img_output), logits=d_real_img_output ) + \ loss_func( labels=tf.zeros_like(d_fake_img_output), logits=d_fake_img_output ) db_loss = \ loss_func( labels=tf.ones_like(d_real_seg_output), logits=d_real_seg_output ) + \ loss_func( labels=tf.zeros_like(d_fake_seg_output), logits=d_fake_seg_output ) d_loss = \ (da_loss + db_loss) / 2. # D will output [BATCH_SIZE, 32, 32, 1] num_da_real_img_acc = tf.size( tf.where(tf.reduce_mean(tf.nn.sigmoid(d_real_img_output), axis=[1,2,3]) > 0.5)[:,0], name='num_da_real_img_acc' ) num_da_fake_img_acc = tf.size( tf.where(tf.reduce_mean(tf.nn.sigmoid(d_fake_img_output), axis=[1,2,3]) < 0.5)[:,0], name='num_da_fake_img_acc' ) num_db_real_seg_acc = tf.size( tf.where(tf.reduce_mean(tf.nn.sigmoid(d_real_seg_output), axis=[1,2,3]) > 0.5)[:,0], name='num_db_real_seg_acc' ) num_db_fake_seg_acc = tf.size( tf.where(tf.reduce_mean(tf.nn.sigmoid(d_fake_seg_output), axis=[1,2,3]) < 0.5)[:,0], name='num_db_fake_seg_acc' ) ## limit weights to 0 #g_weight_regularizer = [0.0001 * tf.nn.l2_loss(v) for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GEN_NAME) if 'weight' in v.name] #g_weight_regularizer = tf.add_n(g_weight_regularizer, name='g_weight_regularizer_loss') #g_loss += g_weight_regularizer #d_weight_regularizer = [0.0001 * tf.nn.l2_loss(v) for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, DIS_NAME) if 'weight' in v.name] #d_weight_regularizer = tf.add_n(d_weight_regularizer, name='d_weight_regularizer_loss') #d_loss += d_weight_regularizer d_loss = tf.identity(d_loss, name='d_loss') g_loss = tf.identity(g_loss, name='g_loss') ## --- Training Set Validation --- # Predictions. #pred_gt = tf.reshape(target_segments_batch, [-1,]) #pred = tf.reshape(target_segments_output, [-1,]) #indices = tf.squeeze(tf.where(tf.not_equal(pred_gt, ignore_label)), 1) #pred_gt = tf.cast(tf.gather(pred_gt, indices), tf.int32) #pred = tf.cast(tf.gather(pred, indices), tf.int32) ## mIoU ### Allowing to use indices matrices in mean_iou() with `num_classes=indices.max()` #weights = tf.cast(tf.less_equal(pred_gt, num_labels), tf.int32) # Ignoring all labels greater than or equal to n_classes. #mIoU, mIoU_update_op = tf.metrics.mean_iou(pred, pred_gt, num_classes=num_labels, weights=weights) # ---[ Variables g_a2b_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GEN_A2B_NAME) g_b2a_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GEN_B2A_NAME) d_a_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, DIS_A_NAME) d_b_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, DIS_B_NAME) g_vars = g_a2b_vars + g_b2a_vars d_vars = d_a_vars + d_b_vars print_list(g_a2b_vars, GEN_A2B_NAME) print_list(g_b2a_vars, GEN_B2A_NAME) print_list(d_a_vars, DIS_A_NAME) print_list(d_b_vars, DIS_B_NAME) # ---[ Optimizer ## `colocate_gradients_with_ops = True` to reduce GPU MEM utils, and fasten training speed OPT_NAME = 'Optimizer' g_opts = []; d_opts = [] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): with tf.variable_scope(OPT_NAME): #with tf.device('/device:GPU:{}'.format(config['gpus']-1)): if True: if len(g_vars) > 0: g_opt = tf.train.AdamOptimizer(learning_rate=config['g_lr'], beta1=0.5, beta2=0.9).minimize(g_loss, var_list=g_vars, colocate_gradients_with_ops=True) g_opts.append(g_opt) if len(d_vars) > 0: d_opt = tf.train.AdamOptimizer(learning_rate=config['d_lr'], beta1=0.5, beta2=0.9).minimize(d_loss, var_list=d_vars, colocate_gradients_with_ops=True) d_opts.append(d_opt) g_opt = tf.group(*g_opts) d_opt = tf.group(*d_opts) opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, OPT_NAME) print_list(opt_vars, OPT_NAME) # --- [ Summary scalars = [d_loss, g_loss] #scalars += [mIoU] scalars += [num_da_real_img_acc, num_da_fake_img_acc, num_db_real_seg_acc, num_db_fake_seg_acc] scalars += [g_loss_a2b, g_loss_b2a, da_loss, db_loss] writer, summarys = create_summary(summary_dir=config['summary_dir'], name=config['suffix'], scalar = scalars, ) ''' Training ''' with tf.Session(config=GpuConfig) as sess: sess.run(tf.global_variables_initializer()) #DONOT put it after ``saver.restore`` sess.run(tf.local_variables_initializer()) #DONOT put it after ``saver.restore`` saver = tf.train.Saver(g_vars + d_vars, max_to_keep=1) #g_saver = tf.train.Saver(g_vars, max_to_keep=1) #d_saver = tf.train.Saver(d_vars, max_to_keep=1) #if self.ckpt: #saver.restore(sess, self.ckpt) #print ("Training starts at %d iteration..." % sess.run(global_step)) feeds = {} # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) inside_epoch = int(config['print_epoch']) if config['print_epoch'] < config['max_epoch'] else int(config['max_epoch'] / 1) outside_epoch = int(config['max_epoch'] / inside_epoch) start = int(sess.run(global_step) / inside_epoch) if start >= outside_epoch: raise ValueError("initial iteration:%d >= max iteration:%d. please reset '--max_epoch' value." % (sess.run(global_step), config['max_epoch'])) start_time = time.time() for epo in range(start, outside_epoch): bar = IncrementalBar('[epoch {:<4d}/{:<4d}]'.format(epo, outside_epoch), max=inside_epoch) for epi in range(inside_epoch): iters = sess.run(global_step) # save summary if epo == 0: save_summarys = sess.run(summarys, feed_dict=feeds) writer.add_summary(save_summarys, iters) for _ in range(config['d_epoch']): sess.run(d_opt, feed_dict=feeds) if iters > self.pretrain_D_epoch: for _ in range(config['g_epoch']): sess.run(g_opt, feed_dict=feeds) sess.run(global_step_update) bar.next() duration = time.time() - start_time disc_loss, gen_loss = \ sess.run([d_loss, g_loss], feed_dict=feeds) na_real, na_fake, nb_real, nb_fake = \ sess.run([num_da_real_img_acc, num_da_fake_img_acc, num_db_real_seg_acc, num_db_fake_seg_acc], feed_dict=feeds) #sess.run(mIoU_update_op, feed_dict=feeds) #miou = sess.run(mIoU, feed_dict=feeds) print (' -', 'DLoss: %-8.2e' % disc_loss, #'(W: %-8.2e)' % disc_wloss, 'GLoss: %-8.2e' % gen_loss, #'(W: %-8.2e)' % gen_wloss, '|', '[Da_img] #real: %d, #fake: %d' % (na_real, na_fake), '[Db_seg] #real: %d, #fake: %d' % (nb_real, nb_fake), '|', #'[train_mIoU] %.2f' % miou, '[ETA] %s' % format_time(duration) ) bar.finish() iters = sess.run(global_step) # save checkpoint if epo % 2 == 0: saver_path = os.path.join(config['ckpt_dir'], '{}.ckpt'.format(config['name'])) saver.save(sess, save_path=saver_path, global_step=global_step) # save summary if epo % 1 == 0: save_summarys = sess.run(summarys, feed_dict=feeds) writer.add_summary(save_summarys, iters) # output samples if epo % 5 == 0: img_gt, seg_gt, seg_1, seg_2, img_1, img_2 = sess.run(source_data_color) print ("Range %10s:" % "seg_gt", seg_gt.min(), seg_gt.max()) print ("Range %10s:" % "seg_1", seg_1.min(), seg_1.max()) print ("Range %10s:" % "seg_2", seg_2.min(), seg_2.max()) print ("Range %10s:" % "img_gt", img_gt.min(), img_gt.max()) print ("Range %10s:" % "img_1", img_1.min(), img_1.max()) print ("Range %10s:" % "img_2", img_2.min(), img_2.max()) _output = np.concatenate([img_gt, seg_gt, seg_1, img_1, img_2, seg_2], axis=0) save_visualization(_output, save_path=os.path.join(config['result_dir'], 'tr-{}.jpg'.format(iters)), size=[3, 2*config['batch_size']]) #seg_output = np.concatenate([seg_gt, seg_2, seg_1], axis=0) #img_output = np.concatenate([img_gt, img_2, img_1], axis=0) #save_visualization(seg_output, save_path=os.path.join(config['result_dir'], 'tr-seg-1gt_2mapback_3map-{}.jpg'.format(iters)), size=[3, config['batch_size']]) #save_visualization(img_output, save_path=os.path.join(config['result_dir'], 'tr-img-1gt_2mapback_3map-{}.jpg'.format(iters)), size=[3, config['batch_size']]) for i,target_data_color in enumerate(target_data_color_queue): val_img_gt, val_seg_gt, val_seg_1, val_seg_2, val_img_1, val_img_2 = sess.run(target_data_color) print ("Val Range %10s:" % "seg_gt", val_seg_gt.min(), val_seg_gt.max()) print ("Val Range %10s:" % "seg_1", val_seg_1.min(), val_seg_1.max()) print ("Val Range %10s:" % "seg_2", val_seg_2.min(), val_seg_2.max()) print ("Val Range %10s:" % "img_gt", val_img_gt.min(), val_img_gt.max()) print ("Val Range %10s:" % "img_1", val_img_1.min(), val_img_1.max()) print ("Val Range %10s:" % "img_2", val_img_2.min(), val_img_2.max()) _output = np.concatenate([val_img_gt, val_seg_gt, val_seg_1, val_img_1, val_img_2, val_seg_2], axis=0) save_visualization(_output, save_path=os.path.join(config['result_dir'], 'val{}-{}.jpg'.format(i,iters)), size=[3, 2*config['batch_size']]) #val_seg_output = np.concatenate([val_seg_gt, val_seg_2, val_seg_1], axis=0) #val_img_output = np.concatenate([val_img_gt, val_img_2, val_img_1], axis=0) #save_visualization(seg_output, save_path=os.path.join(config['result_dir'], 'val{}-seg-1gt_2mapback_3map-{}.jpg'.format(i,iters)), size=[3, config['batch_size']]) #save_visualization(img_output, save_path=os.path.join(config['result_dir'], 'val{}-img-1gt_2mapback_3map-{}.jpg'.format(i,iters)), size=[3, config['batch_size']]) writer.flush() writer.close()
N_CLASSES = 20 DATA_DIR = './datasets/CIHP2' LIST_PATH = './datasets/CIHP2/list/val.txt' DATA_ID_LIST = './datasets/CIHP2/list/val_id.txt' with open(DATA_ID_LIST, 'r') as f: NUM_STEPS = len(f.readlines()) RESTORE_FROM = 'checkpoint' """Create the model and start the evaluation process.""" # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(DATA_DIR, LIST_PATH, DATA_ID_LIST, None, False, False, False, coord) image, label, edge_gt = reader.image, reader.label, reader.edge image_rev = tf.reverse(image, tf.stack([1])) image_list = reader.image_list image_batch = tf.stack([image, image_rev]) label_batch = tf.expand_dims(label, dim=0) # Add one batch dimension. edge_gt_batch = tf.expand_dims(edge_gt, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) image_batch050 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.50)), tf.to_int32(tf.multiply(w_orig, 0.50))])) image_batch075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch125 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 1.25)), tf.to_int32(tf.multiply(w_orig, 1.25))])) image_batch150 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 1.50)), tf.to_int32(tf.multiply(w_orig, 1.50))])) image_batch175 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 1.75)), tf.to_int32(tf.multiply(w_orig, 1.75))])) # Create network.
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float( tf.shape(image_batch)[2]) image_batch_075 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch_05 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5)) ])) # Create network with tf.variable_scope('', reuse=False): net = Deeplab_v2(image_batch, self.conf.num_classes, False, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False, rescale075=True, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False, rescale075=False, rescale05=True, crf_type=self.conf.crf_type) # predictions # Network raw output raw_output100 = net.outputs self.resized_decoder100 = net.decoding raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') os.makedirs(self.conf.out_dir + '/resized_decoding') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def main(): """Create the model and start the evaluation process.""" LABELS = [] ignore_labels = [] if args.classes == 'lip': ignore_labels = [0, 1, 2, 6, 9, 11, 16, 17] # grab the labels from user file try: with open( BASE / 'datasets' / 'labels' / '{}_labels.txt'.format(args.classes), 'r') as f: for line in f: LABELS.append(line.strip('\n')) except Exception as e: print("{} No Label file for dataset {}".format(e, args.classes)) sys.exit(1) make_dir_heirarchy(args.output_dir, LABELS) pattern_output_dir = '' # PIPELINE: make output dir for pattern to classify after pipeline finished if args.pattern: pattern_output_dir = args.output_dir + '_pattern' make_dir_heirarchy(pattern_output_dir, LABELS) # buffer end is current index of last element in buffer when job ran # interval size is the size of the buffer jpp will categorize interval_size = int(args.interval_size) buffer_end = int(args.buffer_size) # sort the directory contents by why they were created in the dir # image paths are relatove paths !! DATA_DIRECTORY = args.input_dir image_list = os.listdir(DATA_DIRECTORY) full_list = [os.path.join(DATA_DIRECTORY, i) for i in image_list] # sort files by time created time_sorted_list = sorted(full_list, key=os.path.getctime) # relative sorted path sorted_filename_list = [os.path.basename(i) for i in time_sorted_list] # only get images up to the buffer size DATA_LIST = sorted_filename_list[max(0, buffer_end - interval_size):buffer_end] # create label file for data loader and get number of IMAGES # image is concatenated with DATA_DIRECTORY in ImageReader init function! # filter videos (mainly for security when not using fullbody detection) NUM_STEPS = 0 with open(DATA_LIST_PATH, 'w+') as f: for image in DATA_LIST: if image.split('.')[-1] in [ 'jpg', 'png', 'jpeg', 'JPG', 'PNG', 'JPEG' ]: f.write('/' + image + '\n') NUM_STEPS += 1 if NUM_STEPS == 0: print("Exiting: No Images Found") return -1 print("CLASSIFYING {} IMAGES".format(NUM_STEPS)) ############################# # LOAD NETWORK & DATA ############################ # Create queue coordinator. coord = tf.train.Coordinator() h, w = INPUT_SIZE # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, None, False, False, coord, buffer_end) image = reader.image image_rev = tf.reverse(image, tf.stack([1])) image_list = reader.image_list image_batch_origin = tf.stack([image, image_rev]) image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)]) image_batch075 = tf.image.resize_images( image_batch_origin, [int(h * 0.75), int(w * 0.75)]) image_batch125 = tf.image.resize_images( image_batch_origin, [int(h * 1.25), int(w * 1.25)]) # Create network. with tf.variable_scope('', reuse=False): net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES) with tf.variable_scope('', reuse=True): net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES) # parsing net parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing'] parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing'] parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing'] parsing_out1_100 = net_100.layers['fc1_human'] parsing_out1_075 = net_075.layers['fc1_human'] parsing_out1_125 = net_125.layers['fc1_human'] # pose net resnet_fea_100 = net_100.layers['res4b22_relu'] resnet_fea_075 = net_075.layers['res4b22_relu'] resnet_fea_125 = net_125.layers['res4b22_relu'] with tf.variable_scope('', reuse=False): pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose') pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose') parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing') parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing') with tf.variable_scope('', reuse=True): pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose') pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose') parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing') parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing') with tf.variable_scope('', reuse=True): pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose') pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose') parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing') parsing_out3_125, parsing_fea3_125 = parsing_refine(parsing_out2_125, pose_out2_125, parsing_fea2_125, name='fc3_parsing') parsing_out1 = tf.reduce_mean(tf.stack([ tf.image.resize_images(parsing_out1_100, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out1_075, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out1_125, tf.shape(image_batch_origin)[1:3, ]) ]), axis=0) parsing_out2 = tf.reduce_mean(tf.stack([ tf.image.resize_images(parsing_out2_100, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out2_075, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out2_125, tf.shape(image_batch_origin)[1:3, ]) ]), axis=0) parsing_out3 = tf.reduce_mean(tf.stack([ tf.image.resize_images(parsing_out3_100, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out3_075, tf.shape(image_batch_origin)[1:3, ]), tf.image.resize_images(parsing_out3_125, tf.shape(image_batch_origin)[1:3, ]) ]), axis=0) raw_output = tf.reduce_mean(tf.stack( [parsing_out1, parsing_out2, parsing_out3]), axis=0) head_output, tail_output = tf.unstack(raw_output, num=2, axis=0) tail_list = tf.unstack(tail_output, num=20, axis=2) tail_list_rev = [None] * 20 for xx in range(14): tail_list_rev[xx] = tail_list[xx] tail_list_rev[14] = tail_list[15] tail_list_rev[15] = tail_list[14] tail_list_rev[16] = tail_list[17] tail_list_rev[17] = tail_list[16] tail_list_rev[18] = tail_list[19] tail_list_rev[19] = tail_list[18] tail_output_rev = tf.stack(tail_list_rev, axis=2) tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1])) raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) raw_output_all = tf.expand_dims(raw_output_all, dim=0) raw_output_all = tf.argmax(raw_output_all, dimension=3) pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor. # Which variables to load. restore_var = tf.global_variables() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) ###################################################### # Load weights. loader = tf.train.Saver(var_list=restore_var) if RESTORE_FROM is not None: if load(loader, sess, RESTORE_FROM): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) print("Prediciting Now") # Iterate over training steps. masked_images = [] for step in range(NUM_STEPS): parsing_ = sess.run(pred_all) if step % 100 == 0: print('step {:d}'.format(step)) print(image_list[step]) img_name = Path(image_list[step].split('/')[-1]) img_path = os.path.join(DATA_DIRECTORY, img_name) if args.style_preprocess: msk = crop_images(img_path, parsing_, classes=args.classes) else: msk = segment_images(img_path, parsing_, classes=args.classes) if msk.size != 0: try: for msk_image, label in msk: if label not in ignore_labels and valid_mask(msk_image): masked_images.append( [img_name, LABELS[label], msk_image]) except Exception as e: print("JPP - ", e, img_name) continue coord.request_stop() coord.join(threads) if args.save_images: print("Saving Images") for name, label, image in masked_images: cv2.imwrite(str(Path(args.output_dir) / label / name), cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) if args.pattern: cv2.imwrite(str(Path(pattern_output_dir) / label / name), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
def train(args): ## set hyparameter img_mean = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() print("g_name:", args.g_name) print("d_name:", args.d_name) print("lambda:", args.lambd) print("learning_rate:", args.learning_rate) print("is_val:", args.is_val) print("---------------------------------") ## load data with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.img_size, args.random_scale, args.random_mirror, args.random_crop, args.ignore_label, args.is_val, img_mean, coord) image_batch, label_batch = reader.dequeue(args.batch_size) print("Data is ready!") ## load model image_normal_batch = tf.train.batch([ (reader.image + img_mean) / 255., ], args.batch_size, dynamic_pad=True) g_net, g_net_x = choose_generator(args.g_name, image_batch, image_normal_batch) score_map = g_net.get_output() fk_batch = tf.nn.softmax(score_map, dim=-1) pre_batch = tf.expand_dims(tf.cast(tf.argmax(fk_batch, axis=-1), tf.uint8), axis=-1) gt_batch = tf.image.resize_nearest_neighbor(label_batch, tf.shape(score_map)[1:3]) gt_batch = tf.where(tf.equal(gt_batch, args.ignore_label), pre_batch, gt_batch) gt_batch = convert_to_scaling(fk_batch, args.num_classes, gt_batch) x_batch = g_net_x.get_appointed_layer('generator/image_conv5_3') d_fk_net, d_gt_net = choose_discriminator(args.d_name, fk_batch, gt_batch, x_batch) d_fk_pred = d_fk_net.get_output() # fake segmentation result in d d_gt_pred = d_gt_net.get_output() # ground-truth result in d label, logits = convert_to_calculateloss(score_map, args.num_classes, label_batch) predict_label = tf.argmax(logits, axis=1) predict_batch = g_net.topredict(score_map, tf.shape(image_batch)[1:3]) print("The model has been created!") ## get all kinds of variables list g_restore_var = [ v for v in tf.global_variables() if 'discriminator' not in v.name ] g_var = [ v for v in tf.trainable_variables() if 'generator' in v.name and 'deconv' not in v.name ] d_var = [v for v in tf.trainable_variables() if 'discriminator' in v.name] # g_trainable_var = [v for v in g_var if 'beta' not in v.name or 'gamma' not in v.name] # batch_norm training open g_trainable_var = g_var d_trainable_var = d_var ## set loss mce_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)) # l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # mce_loss = tf.reduce_mean(mce_loss) + tf.add_n(l2_losses) # g_bce_loss = tf.reduce_mean(tf.log(d_fk_pred + eps)) g_bce_loss = args.lambd * tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fk_pred), logits=d_fk_pred)) g_loss = mce_loss + g_bce_loss # d_loss = tf.reduce_mean(tf.constant(-1.0) * [tf.log(d_gt_pred + eps) + tf.log(1. - d_fk_pred + eps)]) d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_gt_pred), logits=d_gt_pred) \ + tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fk_pred), logits=d_fk_pred)) fk_score_var = tf.reduce_mean(tf.sigmoid(d_fk_pred)) gt_score_var = tf.reduce_mean(tf.sigmoid(d_gt_pred)) mce_loss_var, mce_loss_op = tf.metrics.mean(mce_loss) g_bce_loss_var, g_bce_loss_op = tf.metrics.mean(g_bce_loss) g_loss_var, g_loss_op = tf.metrics.mean(g_loss) d_loss_var, d_loss_op = tf.metrics.mean(d_loss) iou_var, iou_op = tf.metrics.mean_iou(label, predict_label, args.num_classes) accuracy_var, acc_op = tf.metrics.accuracy(label, predict_label) metrics_op = tf.group(mce_loss_op, g_bce_loss_op, g_loss_op, d_loss_op, iou_op, acc_op) ## set optimizer iterstep = tf.placeholder(dtype=tf.float32, shape=[], name='iteration_step') base_lr = tf.constant(args.learning_rate, dtype=tf.float32, shape=[]) lr = tf.scalar_mul(base_lr, tf.pow( (1 - iterstep / args.num_steps), args.power)) # learning rate reduce with the time # g_gradients = tf.train.MomentumOptimizer(learning_rate=lr, # momentum=args.momentum).compute_gradients(g_loss, # var_list=g_trainable_var) g_gradients = tf.train.AdamOptimizer(learning_rate=lr).compute_gradients( g_loss, var_list=g_trainable_var) d_gradients = tf.train.MomentumOptimizer( learning_rate=lr * 10, momentum=args.momentum).compute_gradients(d_loss, var_list=d_trainable_var) grad_fk_oi = tf.gradients(d_fk_pred, fk_batch, name='grad_fk_oi')[0] grad_gt_oi = tf.gradients(d_gt_pred, gt_batch, name='grad_gt_oi')[0] grad_fk_img_oi = tf.gradients(d_fk_pred, image_batch, name='grad_fk_img_oi')[0] grad_gt_img_oi = tf.gradients(d_gt_pred, image_batch, name='grad_gt_img_oi')[0] train_g_op = tf.train.AdamOptimizer(learning_rate=lr).minimize( g_loss, var_list=g_trainable_var) train_d_op = tf.train.MomentumOptimizer(learning_rate=lr * 10, momentum=args.momentum).minimize( d_loss, var_list=d_trainable_var) ## set summary vs_image = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, img_mean], tf.uint8) vs_label = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) vs_predict = tf.py_func( decode_labels, [predict_batch, args.save_num_images, args.num_classes], tf.uint8) tf.summary.image(name='image collection_train', tensor=tf.concat(axis=2, values=[vs_image, vs_label, vs_predict]), max_outputs=args.save_num_images) tf.summary.scalar('fk_score', fk_score_var) tf.summary.scalar('gt_score', gt_score_var) tf.summary.scalar('g_loss_train', g_loss_var) tf.summary.scalar('d_loss_train', d_loss_var) tf.summary.scalar('mce_loss_train', mce_loss_var) tf.summary.scalar('g_bce_loss_train', g_bce_loss_var) tf.summary.scalar('iou_train', iou_var) tf.summary.scalar('accuracy_train', accuracy_var) tf.summary.scalar('grad_fk_oi', tf.reduce_mean(tf.abs(grad_fk_oi))) tf.summary.scalar('grad_gt_oi', tf.reduce_mean(tf.abs(grad_gt_oi))) tf.summary.scalar('grad_fk_img_oi', tf.reduce_mean(tf.abs(grad_fk_img_oi))) tf.summary.scalar('grad_gt_img_oi', tf.reduce_mean(tf.abs(grad_gt_img_oi))) for grad, var in g_gradients + d_gradients: tf.summary.histogram(var.op.name + "/gradients", grad) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph(), max_queue=3) ## set session print("GPU index:" + str(os.environ['CUDA_VISIBLE_DEVICES'])) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) global_init = tf.global_variables_initializer() local_init = tf.local_variables_initializer() sess.run(global_init) sess.run(local_init) ## set saver saver_all = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2) trained_step = 0 if os.path.exists(args.restore_from + 'checkpoint'): trained_step = load_weight(args.restore_from, saver_all, sess) else: saver_g = tf.train.Saver(var_list=g_restore_var, max_to_keep=2) load_weight(args.baseweight_from['g'], saver_g, sess) # the weight is the completely g model threads = tf.train.start_queue_runners(sess, coord) print("all setting has been done,training start!") ## start training # def auto_setting_train_steps(mode): # if mode == 0: # return 5, 1 # elif mode == 1: # return 1, 5 # else: # return 1, 1 d_train_steps = 10 g_train_steps = 1 # flags = [0 for i in range(3)] for step in range(args.num_steps): now_step = int( trained_step) + step if trained_step is not None else step feed_dict = {iterstep: step} for i in range(d_train_steps): _, _ = sess.run([train_d_op, metrics_op], feed_dict) for i in range(g_train_steps): g_loss_, mce_loss_, g_bce_loss_, d_loss_, _, _ = sess.run([ g_loss_var, mce_loss_var, g_bce_loss_var, d_loss_var, train_g_op, metrics_op ], feed_dict) ######################## # fk_score_, gt_score_ = sess.run([fk_score_var, gt_score_var], feed_dict) # if fk_score_ > 0.48 and fk_score_ < 0.52: # flags[0] += 1 # flags[1] = flags[2] = 0 # elif gt_score_ - fk_score_ > 0.3: # flags[1] += 1 # flags[0] = flags[2] = 0 # else: # flags[2] += 1 # flags[0] = flags[1] = 0 # if max(flags) > 100: # d_train_steps, g_train_steps = auto_setting_train_steps(flags.index(max(flags))) ######################## if step > 0 and step % args.save_pred_every == 0: save_weight(args.restore_from, saver_all, sess, now_step) if step % 50 == 0 or step == args.num_steps - 1: print('step={} d_loss={} g_loss={} mce_loss={} g_bce_loss_={}'. format(now_step, d_loss_, g_loss_, mce_loss_, g_bce_loss_)) summary_str = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, now_step) sess.run(local_init) ## end training coord.request_stop() coord.join(threads) print('end....')
def main(): """Create the model and start the training.""" args = get_arguments() """ Get configurations here. We pass some arguments from command line to init configurations, for training hyperparameters, you can set them in TrainConfig Class. Note: we set filter scale to 1 for pruned model, 2 for non-pruned model. The filters numbers of non-pruned model is two times larger than prunde model, e.g., [h, w, 64] <-> [h, w, 32]. """ cfg = TrainConfig(dataset=args.dataset, is_training=True, random_scale=args.random_scale, random_mirror=args.random_mirror, filter_scale=args.filter_scale) cfg.display() # Setup training network and training samples train_reader = ImageReader(cfg=cfg, mode='train') train_net = ICNet_BN(image_reader=train_reader, cfg=cfg, mode='train') loss_sub4, loss_sub24, loss_sub124, reduced_loss = create_losses(train_net, train_net.labels, cfg) # Setup validation network and validation samples with tf.variable_scope('', reuse=True): val_reader = ImageReader(cfg, mode='eval') val_net = ICNet_BN(image_reader=val_reader, cfg=cfg, mode='train') val_loss_sub4, val_loss_sub24, val_loss_sub124, val_reduced_loss = create_losses(val_net, val_net.labels, cfg) # Using Poly learning rate policy base_lr = tf.constant(cfg.LEARNING_RATE) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / cfg.TRAINING_STEPS), cfg.POWER)) # Set restore variable restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma] # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, cfg.MOMENTUM) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Create session & restore weights (Here we only need to use train_net to create session since we reuse it) train_net.create_session() # train_net.restore(cfg.model_weight, restore_var) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) # Iterate over training steps. for step in range(cfg.TRAINING_STEPS): start_time = time.time() feed_dict = {step_ph: step} if step % cfg.SAVE_PRED_EVERY == 0: loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op], feed_dict=feed_dict) train_net.save(saver, cfg.SNAPSHOT_DIR, step) else: loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f}, val_loss: {:.3f} ({:.3f} sec/step)'.\ format(step, loss_value, loss1, loss2, loss3, val_loss_value, duration))
def main(model_log_dir, check_point, mode): tf.reset_default_graph() args = get_arguments() print('mode:{}'.format(mode)) if mode == 'eval' or mode == 'compute_speed': cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale, eval_path_log=os.path.join(LOG_PATH, model_log_dir)) cfg.model_paths['others'] = os.path.join(LOG_PATH, model_log_dir, 'model.ckpt-%d' % check_point) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [ -1, ]) label_flatten = tf.reshape(net.labels, [ -1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(label_flatten, indices), tf.int32) pred = tf.gather(pred_flatten, indices) if cfg.dataset == 'ade20k': pred = tf.add(pred, tf.constant(1, dtype=tf.int64)) mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes'] + 1) elif cfg.dataset == 'cityscapes': mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) elif cfg.dataset == 'others': mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) net.create_session() net.restore(cfg.model_paths[args.model]) duration = 0 if mode == 'eval': for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): start = time.time() _, res, input, labels, out = net.sess.run( [update_op, pred, net.images, net.labels, net.output]) end = time.time() duration += (end - start) input = np.squeeze(input) n_input = _extract_mean_revert(input, IMG_MEAN, swap_channel=True) n_input = n_input.astype(np.uint8) input_image = Image.fromarray(n_input, 'RGB') final_mIou = net.sess.run(mIoU) print('total time:{} mean inference time:{} mIoU: {}'.format( duration, duration / cfg.param['eval_steps'], final_mIou)) Config.save_to_json(dict={ 'FINAL_MIOU': float(final_mIou), "EVAL_STEPS": cfg.param['eval_steps'] }, path=os.path.dirname( cfg.model_paths['others']), file_name='eval.json', mode=mode) else: for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): start = time.time() res = net.sess.run(pred) end = time.time() duration += (end - start) print('total time:{} mean inference time:{}'.format( duration, duration / cfg.param['eval_steps'])) Config.save_to_json(dict={ 'Total Inference Time': float(duration), "Mean Inference Time": duration / cfg.param['eval_steps'] }, path=os.path.dirname( cfg.model_paths['others']), file_name='eval.json', mode=mode) else: '''inference mode''' args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale, eval_path_log=os.path.join(LOG_PATH, model_log_dir)) cfg.model_paths['others'] = os.path.join(LOG_PATH, model_log_dir, 'model.ckpt-%d' % check_point) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(cfg=cfg, mode='inference') net.create_session() net.restore(cfg.model_paths[args.model]) dir = '/home/dls/meng/DLProject/kaggle_dataset/dataset/test/' N = len(list(os.listdir(dir))) raw_data = {'img': [], 'rle_mask': []} # f.write('img,rle_mask\n') duration = 0 fig_list = glob.glob(dir + '*.jpg') for index, i in zip(range(len(fig_list)), fig_list): img = Image.open(i) start = time.time() icnet_predict = net.predict(img) stop = time.time() duration += (stop - start) mask_array = np.squeeze(icnet_predict) en = run_length_encode(mask_array) print('{}/{} cost:{}s'.format(index, N, str(stop - start))) if i.find('.jpg') != -1: print('i is {}'.format(i)) # f.write('{},{}\n'.format(i, en)) # f.flush() raw_data['img'].append(os.path.basename(i)) raw_data['rle_mask'].append(en) else: print('i is {}, not .jpg, exit now!'.format(i)) exit() mean_inference_time = duration / (index + 1) df = pandas.DataFrame(raw_data, columns=['img', 'rle_mask']) with open(os.path.join(LOG_PATH, model_log_dir, 'SUBMISSION.csv'), mode='w') as f: df.to_csv(f, index=False) Config.save_to_json(dict={ 'Total Inference Time': float(duration), "Mean Inference Time": mean_inference_time }, path=os.path.dirname(cfg.model_paths['others']), file_name='inference.json', mode=mode) sess = tf.get_default_session() if sess: sess._exit__(None, None, None)
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [v for v in tf.global_variables() if 'fc' not in v.name] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [v for v in tf.global_variables() if 'resnet_v1' in v.name] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [v for v in all_trainable if 'resnet_v1' in v.name] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'decoder' in v.name] decoder_w_trainable = [v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name] # lr * 10.0 decoder_b_trainable = [v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name] # lr * 20.0 # Check assert(len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert(len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output = net.outputs # [batch_size, h, w, 21] # Output size output_shape = tf.shape(raw_output) output_size = (output_shape[1], output_shape[2]) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients(self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = grads[:len(encoder_trainable)] grads_decoder_w = grads[len(encoder_trainable) : (len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients(zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients(zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients(zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=2) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter(self.conf.logdir, graph=tf.get_default_graph())
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Devices gpu_list = get_available_gpus() zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], [] previous_crf_names = [] restore_vars = [] self.loaders = [] self.im_list = [] for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch, self.sp_batch = reader.dequeue( self.conf.batch_size) self.im_list.append(self.image_batch) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) sp_batch_075 = tf.image.resize_images( self.sp_batch, [int(h * 0.75), int(w * 0.75)]) sp_batch_05 = tf.image.resize_images( self.sp_batch, [int(h * 0.5), int(w * 0.5)]) #for i in range(1): # self.image_batch = tf.Print(self.image_batch, [self.image_batch[i]], message = 'image batch ', summarize=5) #for i in range(1): # self.label_batch = tf.Print(self.label_batch, [self.label_batch[i]], message = 'label batch ', summarize=5) #for i in range(1): # self.sp_batch = tf.Print(self.sp_batch, [self.sp_batch[i]], message = 'sp batch ', summarize=5) # Create network with tf.variable_scope('', reuse=False): if self.conf.crf_type == 'crf': net = Deeplab_v2(self.image_batch, self.conf.num_classes, True, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) else: net = Deeplab_v2(self.image_batch, self.conf.num_classes, True, rescale075=False, rescale05=False, crf_type=self.conf.crf_type, superpixels=self.sp_batch) ''' with tf.variable_scope('', reuse=True): if self.conf.crf_type == 'crfSP': net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True, rescale075=True, rescale05=False, crf_type = self.conf.crf_type, superpixels=sp_batch_075) else: net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True, rescale075=True, rescale05=False, crf_type = self.conf.crf_type) with tf.variable_scope('', reuse=True): if self.conf.crf_type == 'crfSP': net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True, rescale075=False, rescale05=True, crf_type = self.conf.crf_type, superpixels=sp_batch_05) else: net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True, rescale075=False, rescale05=True, crf_type = self.conf.crf_type) ''' # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if ('fc' not in v.name and 'crfrnn' not in v.name) ] # when don't want to train using previous crf weights #restore_var = [v for v in tf.global_variables() if ('fc' not in v.name and 'superpixel' not in v.name)] restore_vars.append(restore_var) # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part for name in previous_crf_names: for v in all_trainable: if v.name == name: all_trainable.remove(v) crf_trainable = [ v for v in all_trainable if ('crfrnn' in v.name and v.name not in previous_crf_names ) ] previous_crf_names.extend(v.name for v in crf_trainable) encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name and 'crfrnn' not in v.name ] # lr * 1.0 # Remove encoder_trainable from all_trainable #all_trainable = [v for v in all_trainable if v not in encoder_trainable] # Decoder part decoder_trainable = [ v for v in all_trainable if 'fc' in v.name and 'crfrnn' not in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if ('weights' in v.name or 'gamma' in v.name) and 'crfrnn' not in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if ('biases' in v.name or 'beta' in v.name) and 'crfrnn' not in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(encoder_trainable) + len(decoder_trainable) + len(crf_trainable) ) #+ len(encoder_trainable) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output = raw_output100 ''' raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) ''' # Ground Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack( raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) # [batch_size, h, w] ''' label_proc075 = prepare_label(self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) label_proc05 = prepare_label(self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) ''' raw_gt = tf.reshape(label_proc, [ -1, ]) ''' raw_gt075 = tf.reshape(label_proc075, [-1,]) raw_gt05 = tf.reshape(label_proc05, [-1,]) ''' indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) ''' indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) ''' gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) ''' gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) ''' raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) ''' raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) ''' prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) ''' prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) ''' # Pixel-wise softmax_cross_entropy loss #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss = tf.nn.softmax_cross_entropy_with_logits( logits=raw_prediction, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) # NOTE used to be loss=tf.nn.softmax_cross_entropy_with_logits_v2 ''' coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772, 0.62338799, 0.7729363, 0.42038501, 0.98557268, 1.31867536, 0.85313332, 0.67227604, 1.21317965, 1. , 0.24263748, 1.80877607, 1.3082213, 0.79664027, 0.72543945, 1.27823374] ''' #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction) #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) loss100 = tf.nn.softmax_cross_entropy_with_logits( logits=raw_prediction100, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) # NOTE used to be loss=tf.nn.softmax_cross_entropy_with_logits_v2 #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100) #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) #loss075 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=raw_prediction075, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes))) #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075) #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) #loss05 = tf.nn.softmax_cross_entropy_with_logits_v2(logits=raw_prediction05, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes))) #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100 ) #+ tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer( learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer( learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer( learning_rate * 20.0, self.conf.momentum) opt_crf = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable ] #encoder_trainable + # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [ v.assign(tf.zeros_like(v)) for v in accum_grads ] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients(self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable) #encoder_trainable + # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] #''' grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable ):len(encoder_trainable) + len(decoder_w_trainable)] grads_decoder_b = accum_grads[( len(encoder_trainable) + len(decoder_w_trainable)):(len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable))] grads_crf = accum_grads[ len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable ):] # assuming crf gradients are appended to the end #''' ''' grads_decoder_w = accum_grads[: len(decoder_w_trainable)] grads_decoder_b = accum_grads[(len(decoder_w_trainable)):(len(decoder_w_trainable)+len(decoder_b_trainable))] grads_crf = accum_grads[len(decoder_w_trainable)+len(decoder_b_trainable):] # assuming crf gradients are appended to the end ''' zip_encoder.append(list(zip(grads_encoder, encoder_trainable))) zip_decoder_b.append( list(zip(grads_decoder_b, decoder_b_trainable))) zip_decoder_w.append( list(zip(grads_decoder_w, decoder_w_trainable))) zip_crf.append(list(zip(grads_crf, crf_trainable))) avg_grads_encoder = average_gradients(zip_encoder) avg_grads_decoder_w = average_gradients(zip_decoder_w) avg_grads_decoder_b = average_gradients(zip_decoder_b) avg_grads_crf = average_gradients(zip_crf) for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Update params train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder) train_op_fc_w = opt_decoder_w.apply_gradients( avg_grads_decoder_w) train_op_fc_b = opt_decoder_b.apply_gradients( avg_grads_decoder_b) train_op_crf = opt_crf.apply_gradients(avg_grads_crf) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b, train_op_crf) # train_op_conv # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model for i in range(len(gpu_list)): with tf.device(gpu_list[i]): self.loaders.append(tf.train.Saver(var_list=restore_vars[i])) #self.loaders.append(tf.train.Saver(var_list=tf.global_variables())) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, axis=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=1) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def main(model_log_dir, check_point): tf.reset_default_graph() args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale, eval_path_log=os.path.join(LOG_PATH, model_log_dir)) cfg.model_paths['others'] = os.path.join(LOG_PATH, model_log_dir, 'model.ckpt-%d' % check_point) eval_sizemodel = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [-1, ]) label_flatten = tf.reshape(net.labels, [-1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(label_flatten, indices), tf.int32) pred = tf.gather(pred_flatten, indices) assert cfg.dataset == 'others' mIoU, update_op = tf.metrics.mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) duration = 0 for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): start = time.time() icnet_res, input, labels, _ = net.sess.run([pred, net.images, net.labels, update_op]) end = time.time() duration += (end - start) input = np.squeeze(input) n_input = _extract_mean_revert(input, IMG_MEAN, swap_channel=True) n_input = n_input.astype(np.uint8) input_image = Image.fromarray(n_input, 'RGB') if i % 100 == 0: # save_pred_to_image(res=res, # shape=cfg.param['eval_size'], # save_path=os.path.dirname(cfg.model_paths['others']) + '/eval_img', # save_name='eval_%d_img.png' % i) icnet = np.array(np.reshape(icnet_res, cfg.param['eval_size']), dtype=np.uint8) * 255 icnet = Image.fromarray(icnet.astype(np.uint8)) labels = np.squeeze(labels) * 255 labels = Image.fromarray(labels.astype(np.uint8)) fig, ax1 = plt.subplots(figsize=(80, 13)) plot1 = plt.subplot(141) plot1.set_title("Input Image", fontsize=50) plt.imshow(input_image) plt.axis('off') plot2 = plt.subplot(142) plot2.set_title("Ground Truth Mask", fontsize=50) plt.imshow(labels, cmap='gray') plt.axis('off') plot3 = plt.subplot(143) plot3.set_title("Our Result", fontsize=50) plt.imshow(icnet, cmap='gray') plt.axis('off') plt.show() save_comparation_path = os.path.dirname(cfg.model_paths['others']) + '/eval_compare' if os.path.exists(save_comparation_path) is False: os.mkdir(save_comparation_path) plt.savefig(os.path.join(save_comparation_path, 'eval_%d_img.png' % i)) final_mIou = net.sess.run(mIoU) print('total time:{} mean inference time:{} mIoU: {}'.format(duration, duration / cfg.param['eval_steps'], final_mIou)) Config.save_to_json(dict={'FINAL_MIOU': float(final_mIou), "EVAL_STEPS": cfg.param['eval_steps']}, path=os.path.dirname(cfg.model_paths['others']), file_name='eval.json', mode='eval') sess = tf.get_default_session() if sess: sess._exit__(None, None, None)
def main(lr=None, log_path_end='', bs=None, train_epoch=None, lambda_list=None, random_mirror=False, random_scale=False, model_weight=None): """Create the model and start the training.""" tf.reset_default_graph() args = get_arguments() """ Get configurations here. We pass some arguments from command line to init configurations, for training hyperparameters, you can set them in TrainConfig Class. Note: we set filter scale to 1 for pruned model, 2 for non-pruned model. The filters numbers of non-pruned model is two times larger than prunde model, e.g., [h, w, 64] <-> [h, w, 32]. """ cfg = TrainConfig(dataset=args.dataset, is_training=True, random_scale=random_scale, random_mirror=random_mirror, filter_scale=args.filter_scale, log_path_end=log_path_end, model_weight=model_weight) if lr: cfg.LEARNING_RATE = lr if bs: cfg.BATCH_SIZE = bs if lambda_list: cfg.LAMBDA1 = lambda_list[0] cfg.LAMBDA2 = lambda_list[1] cfg.LAMBDA3 = lambda_list[2] if train_epoch is not None: cfg.TRAINING_EPOCHS = train_epoch cfg.display() # Setup training network and training samples train_reader = ImageReader(cfg=cfg, mode='train') train_net = ICNet_BN(image_reader=train_reader, cfg=cfg, mode='train') loss_sub4, loss_sub24, loss_sub124, reduced_loss = create_losses( train_net, train_net.labels, cfg) # Setup validation network and validation samples with tf.variable_scope('', reuse=True): val_reader = ImageReader(cfg, mode='eval') val_net = ICNet_BN(image_reader=val_reader, cfg=cfg, mode='train') val_loss_sub4, val_loss_sub24, val_loss_sub124, val_reduced_loss = create_losses( val_net, val_net.labels, cfg) # Using Poly learning rate policy base_lr = tf.constant(cfg.LEARNING_RATE) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / cfg.TRAINING_EPOCHS), cfg.POWER)) # learning_rate = base_lr # Set restore variable restore_var = tf.global_variables() restore_var = [ v for v in tf.global_variables() if 'conv6_cls' not in v.name ] all_trainable = [ v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma ] # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, cfg.MOMENTUM) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Create session & restore weights (Here we only need to use train_net to create session since we reuse it) train_net.create_session() train_net.restore(cfg.model_weight, restore_var) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) # Iterate over training steps. train_info = [] iter_max = math.ceil(cfg.param['total_train_sample'] / cfg.BATCH_SIZE) try: for epoch in range(cfg.TRAINING_EPOCHS): feed_dict = {step_ph: epoch} for iter in range(iter_max): start_time = time.time() loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run( [ reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op ], feed_dict=feed_dict) duration = time.time() - start_time log = { 'LOSS_VALUE': float(loss_value), 'LOSS_1': float(loss1), 'LOSS_2': float(loss2), 'LOSS_3': float(loss3), 'VALIDATION_LOSS_VALUE': float(val_loss_value), 'DURATION': float(duration), 'STEP': int(iter), 'EPOCH': int(epoch), } train_info.append(log) print( 'epoch {:d} step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f}, val_loss: {:.3f} ({:.3f} sec/step)'. \ format(epoch, iter, loss_value, loss1, loss2, loss3, val_loss_value, duration)) if (epoch + 1) % cfg.SAVE_PRED_EVERY == 0: train_net.save(saver, cfg.SNAPSHOT_DIR, epoch) except KeyboardInterrupt: Config.save_to_json(dict=train_info, path=cfg.SNAPSHOT_DIR, file_name='loss.json') print("loss.json was saved at %s" % cfg.SNAPSHOT_DIR) Config.save_to_json(dict=train_info, path=cfg.SNAPSHOT_DIR, file_name='loss.json') print("loss.json was saved at %s" % cfg.SNAPSHOT_DIR) sess = tf.get_default_session() if sess: sess._exit__(None, None, None) return cfg.SNAPSHOT_DIR
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Devices gpu_list = get_available_gpus() zip_encoder, zip_decoder_b, zip_decoder_w = [], [], [] restore_vars = [] self.loaders = [] self.im_list = [] for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch, names = reader.dequeue( self.conf.batch_size) self.im_list.append(self.image_batch) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] restore_vars.append(restore_var) # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [ v for v in all_trainable if 'fc' in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack( raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label( self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) label_proc05 = prepare_label( self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where( tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction05, labels=gt05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean( loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer( learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer( learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer( learning_rate * 20.0, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable ] # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [ v.assign(tf.zeros_like(v)) for v in accum_grads ] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable):( len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = accum_grads[(len(encoder_trainable) + len(decoder_w_trainable)):] zip_encoder.append(list(zip(grads_encoder, encoder_trainable))) zip_decoder_b.append( list(zip(grads_decoder_w, decoder_w_trainable))) zip_decoder_w.append( list(zip(grads_decoder_b, decoder_b_trainable))) avg_grads_encoder = average_gradients(zip_encoder) avg_grads_decoder_w = average_gradients(zip_decoder_w) avg_grads_decoder_b = average_gradients(zip_decoder_b) for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Update params train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder) train_op_fc_w = opt_decoder_w.apply_gradients( avg_grads_decoder_w) train_op_fc_b = opt_decoder_b.apply_gradients( avg_grads_decoder_b) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model for i in range(len(gpu_list)): with tf.device(gpu_list[i]): #print(restore_var) #print("restoring gpu ", i) self.loaders.append(tf.train.Saver(var_list=restore_vars[i])) #print("restored gpu ", i) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, axis=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=1) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def main(): args = get_arguments() cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale) model = model_config[args.model] reader = ImageReader(cfg=cfg, mode='eval') net = model(image_reader=reader, cfg=cfg, mode='eval') # mIoU pred_flatten = tf.reshape(net.output, [ -1, ]) label_flatten = tf.reshape(net.labels, [ -1, ]) mask = tf.not_equal(label_flatten, cfg.param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(label_flatten, indices), tf.int32) pred = tf.gather(pred_flatten, indices) if cfg.dataset == 'ade20k': pred = tf.add(pred, tf.constant(1, dtype=tf.int64)) mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes'] + 1) elif cfg.dataset == 'cityscapes': mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) elif cfg.dataset == 'others': mIoU, update_op = tf.metrics.mean_iou( predictions=pred, labels=gt, num_classes=cfg.param['num_classes']) net.create_session() net.restore(cfg.model_paths[args.model]) try: if args.out: os.makedirs("out", exist_ok=True) print("Directing predictions to files") i = 0 for name in reader.image_list: name = os.path.basename(name) print(name) out = net.sess.run(net.output) i += 1 cv2.imwrite( f"out/eval_{name}{'' if name.endswith('.png') else '.png'}", out[0, :, :, 0]) else: if args.all: for i in trange(len(reader.image_list), desc='evaluation', leave=True): _ = net.sess.run(update_op) else: for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True): _ = net.sess.run(update_op) print('mIoU: {}'.format(net.sess.run(mIoU))) except tf.errors.OutOfRangeError: print("Out of images", i)