def test(args): """ Function to test the architecture by saving disparities to the output directory """ # Since it is clear post-processing is better in all runs I have done, I will only # save post-processed results. Unless explicitly stated otherwise. # Also for Pilzer, the disparities are already post-processed by their own FuseNet. do_post_processing = args.postprocessing and 'pilzer' not in args.architecture input_height = args.input_height input_width = args.input_width output_directory = args.output_dir n_img, test_loader = prepare_dataloader(args, 'test') model = create_architecture(args) which_model = 'final' if args.load_final else 'best' model.load_networks(which_model) model.to_test() disparities = np.zeros((n_img, input_height, input_width), dtype=np.float32) inference_time = 0.0 # used in test time, wrapping `forward` in no_grad() so we don't save # intermediate steps for backprop with torch.no_grad(): for i, data in enumerate(test_loader): if i % 100 == 0 and i != 0: print('Testing... Now at image: {}'.format(i)) t_start = time.time() # Do a forward pass left_view = data['left_image'].squeeze() disps = model.fit(data) # Some architectures output a single disparity, not a tuple of 4 disparities. disps = disps[0][:, 0, :, :] if isinstance( disps, tuple) else disps.squeeze() if do_post_processing: disparities[i] = post_process_disparity(disps.cpu().numpy()) else: disp = disps.unsqueeze(1) disparities[i] = disp[0].squeeze().cpu().numpy() t_end = time.time() inference_time += (t_end - t_start) if args.test_time: test_time_message = 'Inference took {:.4f} seconds. That is {:.2f} imgs/s or {:.6f} s/img.' print( test_time_message.format(inference_time, (n_img / inference_time), 1.0 / (n_img / inference_time))) disp_file_name = 'disparities_{}_{}.npy'.format(args.dataset, model.name) full_disp_path = os.path.join(output_directory, disp_file_name) if os.path.exists(full_disp_path): print('Overwriting disparities at {}...'.format(full_disp_path)) np.save(full_disp_path, disparities) print('Finished Testing')
def reconstruct_right(args): """ Function to reconstruct the right view of stereo pairs from left view """ # Since it is clear post-processing is better in all runs I have done, I will only # save post-processed results. Unless explicitly stated otherwise. # Also for Pilzer, the disparities are already post-processed by their own FuseNet. do_post_processing = args.postprocessing and 'pilzer' not in args.architecture input_height = args.input_height input_width = args.input_width output_directory = args.output_dir file_names = os.listdir(args.data_dir) file_names.sort() file_names = [args.data_dir + '/' + file_name for file_name in file_names] # Create model model = create_architecture(args) which_model = 'final' if args.load_final else 'best' model.load_networks(which_model) model.to_test() # Make Fake loss module to use monodepthloss.generate_image_right # Can use it with "import MonodepthLoss" and MonodepthLoss.generate_image...? fake_loss = MonodepthLoss(args) fake_loss = fake_loss.to(args.device) for idx, left in enumerate(file_names): left_image = Image.open(left) input_size = (left_image.width, left_image.height) resize = transforms.ResizeImage(train=False, size=(input_height, input_width)) totensor = transforms.ToTensor(train=False) left_image = totensor(resize(left_image)) left_image = torch.stack((left_image, torch.flip(left_image, [2]))) # Make dicctionary to feed model.fit() left_data = {'left_image': left_image} # used in test time, wrapping `forward` in no_grad() so we don't save # intermediate steps for backprop with torch.no_grad(): # Estimate disparity disps = model.fit(left_data) disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps] disp_right_est = disp_right_est[0] # Using estimated disparity, apply it to left view and obtain right view fake_right = fake_loss.generate_image_right(left_image.to(args.device), disp_right_est) # convert Tensor(fake_right) to PIL image and save it! output_name = os.path.splitext(os.path.basename(left))[0] save_path = os.path.join(output_directory, '{}.png'.format(output_name)) save_right = torchvision.transforms.functional.to_pil_image(fake_right[0].cpu()) save_right = save_right.resize(input_size) save_right.save(save_path) if idx % 200 == 0: print('Processed ' + save_path)
def check_reconstruct_right(args): """ Function to reconstruct the right view of stereo pairs from left view """ # Since it is clear post-processing is better in all runs I have done, I will only # save post-processed results. Unless explicitly stated otherwise. # Also for Pilzer, the disparities are already post-processed by their own FuseNet. do_post_processing = args.postprocessing and 'pilzer' not in args.architecture input_height = args.input_height input_width = args.input_width input_left = args.left_view output_directory = args.output_dir # n_img, test_loader = prepare_dataloader(args, 'test') # Create model model = create_architecture(args) which_model = 'final' if args.load_final else 'best' model.load_networks(which_model) model.to_test() # Make Fake loss module to use monodepthloss.generate_image_right # Can use it with "import MonodepthLoss" and MonodepthLoss.generate_image...? fake_loss = MonodepthLoss(args) fake_loss = fake_loss.to(args.device) # used in test time, wrapping `forward` in no_grad() so we don't save # intermediate steps for backprop ext = "" if args.load_final is False: ext = "_best" file_names = ['000025.png', '000031.png', '000036.png', '000049.png'] file_names = [ '/home/kishida/depthgan/sample_kitti_obj/' + file_name for file_name in file_names ] for filename in file_names: # Conver input PIL image to Tensor with transformation left_image = Image.open(filename) input_size = (left_image.width, left_image.height) resize = transforms.ResizeImage(train=False, size=(input_height, input_width)) totensor = transforms.ToTensor(train=False) left_image = totensor(resize(left_image)) left_image = torch.stack((left_image, torch.flip(left_image, [2]))) # Make dicctionary to feed model.fit() left_data = {'left_image': left_image} with torch.no_grad(): # Estimate disparity disps = model.fit(left_data) disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps] disp_right_est = disp_right_est[0] # Using estimated disparity, apply it to left view and obtain right view print('reconstructing right view from left view') fake_right = fake_loss.generate_image_right( left_image.to(args.device), disp_right_est) # convert Tensor(fake_right) to PIL image. output_dir = os.path.dirname(filename) output_name = os.path.splitext(os.path.basename(filename))[0] model_name = os.path.basename(args.model_name) save_path = os.path.join( output_dir, '{}_rec_{}{}.jpg'.format(output_name, model_name, ext)) save_right = torchvision.transforms.functional.to_pil_image( fake_right[0].cpu()) print(input_size) save_right = save_right.resize(input_size) save_right.save(save_path) print('Saved image : ' + save_path) arguments = ['display', save_path] subprocess.call(arguments)
def train(args): """ Function used for training any of the architectures, given an input parse. """ tb_dir = os.path.join("saved_models", args.model_name, "tensorboard_" + args.model_name) writer = tbx.SummaryWriter(tb_dir) def validate(epoch): model.to_test() disparities = np.zeros((val_n_img, 256, 512), dtype=np.float32) model.set_new_loss_item(epoch, train=False) # For a WGAN architecture we need to access gradients. if 'wgan' not in args.architecture: torch.set_grad_enabled(False) for i, data in enumerate(val_loader): # Get the losses for the model for this epoch. model.set_input(data) model.forward() model.add_running_loss_val(epoch) if 'wgan' not in args.architecture: torch.set_grad_enabled(True) # Store the running loss for the validation images. model.make_running_loss(epoch, val_n_img, train=False) return n_img, loader = prepare_dataloader(args, 'train') val_n_img, val_loader = prepare_dataloader(args, 'val') model = create_architecture(args) model.set_data_loader(loader) print('data loader is set') if not args.resume: # We keep track of the aggregated losses per epoch in a dict. For # now the pre-training train loss is set to zero. The pre-training # validation loss will be computed. best_val_loss = float('Inf') # Compute loss per image (computation keeps number of images over # batch size in mind, to compensate for partial batches being forwarded. validate(-1) pre_validation_update(model.losses[-1]['val']) else: best_val_loss = min( [model.losses[epoch]['val']['G'] for epoch in model.losses.keys()]) running_val_loss = 0.0 print('Now, training starts') for epoch in range(model.start_epoch, args.epochs): print('Epoch {} is beginning...'.format(epoch)) model.update_learning_rate(epoch, args.learning_rate) c_time = time.time() model.to_train() model.set_new_loss_item(epoch) # Run a single training epoch. Generalizes to WGAN variants as well. model.run_epoch(epoch, n_img) # The validate can return either a dictionary with metrics or None. validate(epoch) # Print an update of training, val losses. Possibly also do full evaluation of depth maps. print_epoch_update(epoch, time.time() - c_time, model.losses, model.rec_losses) for loss_name in model.loss_names: writer.add_scalar('GAN/' + loss_name, model.losses[epoch]['train'][loss_name], epoch) for loss_name in model.rec_loss_names: writer.add_scalar('Reconstruction/' + loss_name, model.rec_losses[epoch]['train'][loss_name], epoch) # Make a checkpoint, so training can be resumed. running_val_loss = model.losses[epoch]['val']['G'] is_best = running_val_loss < best_val_loss if is_best: best_val_loss = running_val_loss model.save_checkpoint(epoch, is_best, best_val_loss) print('Epoch {} ended'.format(epoch)) print('Finished Training. Best validation loss:\t{:.3f}'.format( best_val_loss)) # Save the model of the final epoch. If another model was better, also save it separately as best. model.save_networks('final') if running_val_loss != best_val_loss: model.save_best_networks() model.save_losses() writer.close()
def reconstruct_right(args): """ Function to reconstruct the right view of stereo pairs from left view """ # Since it is clear post-processing is better in all runs I have done, I will only # save post-processed results. Unless explicitly stated otherwise. # Also for Pilzer, the disparities are already post-processed by their own FuseNet. do_post_processing = args.postprocessing and 'pilzer' not in args.architecture input_height = args.input_height input_width = args.input_width input_left = args.left_view output_directory = args.output_dir # n_img, test_loader = prepare_dataloader(args, 'test') # Conver input PIL image to Tensor with transformation left_image = Image.open(input_left) resize = transforms.ResizeImage(train=False, size=(256, 512)) totensor = transforms.ToTensor(train=False) left_image = totensor(resize(left_image)) left_image = torch.stack((left_image, torch.flip(left_image, [2]))) # Make dicctionary to feed model.fit() left_data = {'left_image': left_image} # Create model model = create_architecture(args) which_model = 'final' if args.load_final else 'best' model.load_networks(which_model) model.to_test() # Make Fake loss module to use monodepthloss.generate_image_right # Can use it with "import MonodepthLoss" and MonodepthLoss.generate_image...? fake_loss = MonodepthLoss(args) fake_loss = fake_loss.to(args.device) # used in test time, wrapping `forward` in no_grad() so we don't save # intermediate steps for backprop with torch.no_grad(): # Estimate disparity disps = model.fit(left_data) disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps] disp_right_est = disp_right_est[0] # Using estimated disparity, apply it to left view and obtain right view print('reconstructing right view from left view') fake_right = fake_loss.generate_image_right(left_image.to(args.device), disp_right_est) disp_to_img = scipy.misc.imresize(disp_right_est[0].squeeze(), [input_height, input_width]) print(type(disp_to_img)) print(disp_to_img.shape) print(disp_to_img[100:110, 100:110]) Image.fromarray(disp_to_img).save('./output/000031_disp.jpg') # convert Tensor(fake_right) to PIL image and save it! print('Saving reconstructed right view...') output_dir = os.path.dirname(input_left) output_name = os.path.splitext(os.path.basename(input_left))[0] model_name = os.path.basename(args.model_name) save_path = os.path.join( output_dir, '{}_rec_{}.jpg'.format(output_name, model_name)) save_right = torchvision.transforms.functional.to_pil_image( fake_right[0].cpu()) save_right.save(save_path) print('Saved image : ' + save_path)