def __init__(self, model_path): assert isinstance(model_path, (str)) self.model_path = model_path encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING MODEL encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") loaded_dict_enc = torch.load(encoder_path, map_location=device) filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) self.encoder = encoder self.depth_decoder = depth_decoder self.feed_height = loaded_dict_enc['height'] self.feed_width = loaded_dict_enc['width']
def __init__(self, model_name="mono_1024x320"): self.model_name = model_name download_model_if_doesnt_exist(model_name) encoder_path = os.path.join("./monodepth2/models", model_name, "encoder.pth") depth_decoder_path = os.path.join("./monodepth2/models", model_name, "depth.pth") # LOADING PRETRAINED MODEL self.encoder = networks.ResnetEncoder(18, False) self.depth_decoder = networks.DepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict() } self.encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') self.depth_decoder.load_state_dict(loaded_dict) self.encoder.eval() self.depth_decoder.eval() self.feed_height = loaded_dict_enc['height'] self.feed_width = loaded_dict_enc['width']
def __init__(self, model_name, no_cuda): # Setup execution env if torch.cuda.is_available() and not no_cuda: self._device = torch.device("cuda") else: self._device = torch.device("cpu") # Get model download_model_if_doesnt_exist(model_name) dir_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(dir_path, "monodepth2", "models", model_name) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # Load encoder self._encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=self._device) # extract the height and width of image that this model was trained with self._feed_height = loaded_dict_enc['height'] self._feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self._encoder.state_dict() } self._encoder.load_state_dict(filtered_dict_enc) self._encoder.to(self._device) self._encoder.eval() # Load decoder self._depth_decoder = networks.DepthDecoder( num_ch_enc=self._encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=self._device) self._depth_decoder.load_state_dict(loaded_dict) self._depth_decoder.to(self._device) self._depth_decoder.eval() # ROS image subscriber and publiser self._img_pub = rospy.Publisher('monodepth2')
x = x[None, ...] return x # ## Setting up Monodepth model # We build our monocular depth estimation model from the Monodepth module # Define which model to use and download if not found model_name = "mono_640x192" download_model_if_doesnt_exist(model_name) # Build paths to coders and instantiate from path encoder_path = os.path.join("models", model_name, "encoder.pth") depth_decoder_path = os.path.join("models", model_name, "depth.pth") encoder = networks.ResnetEncoder(18, False).cuda() depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)).cuda() # Encoder and Decoder loading loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') depth_decoder.load_state_dict(loaded_dict) # Put the coders in evaluation mode encoder.eval()
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.batch_size = 1 assert sum( (opt.eval_mono, opt.eval_stereo, opt.no_eval) ) == 1, "Please choose mono or stereo evaluation by setting either --eval_mono, --eval_stereo, --custom_run" assert sum( (opt.log, opt.repr) ) < 2, "Please select only one between LR and LOG by setting --repr or --log" assert opt.bootstraps == 1 or opt.snapshots == 1, "Please set only one of --bootstraps or --snapshots to be major than 1" # get the number of networks nets = max(opt.bootstraps, opt.snapshots) do_uncert = (opt.log or opt.repr or opt.dropout or opt.post_process or opt.bootstraps > 1 or opt.snapshots > 1) print("-> Beginning inference...") opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir( opt.load_weights_folder), "Cannot find a folder at {}".format( opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) if opt.bootstraps > 1: # prepare multiple checkpoint paths from different trainings encoder_path = [ os.path.join(opt.load_weights_folder, "boot_%d" % i, "weights_19", "encoder.pth") for i in range(1, opt.bootstraps + 1) ] decoder_path = [ os.path.join(opt.load_weights_folder, "boot_%d" % i, "weights_19", "depth.pth") for i in range(1, opt.bootstraps + 1) ] encoder_dict = [ torch.load(encoder_path[i]) for i in range(opt.bootstraps) ] height = encoder_dict[0]['height'] width = encoder_dict[0]['width'] elif opt.snapshots > 1: # prepare multiple checkpoint paths from the same training encoder_path = [ os.path.join(opt.load_weights_folder, "weights_%d" % i, "encoder.pth") for i in range(opt.num_epochs - opt.snapshots, opt.num_epochs) ] decoder_path = [ os.path.join(opt.load_weights_folder, "weights_%d" % i, "depth.pth") for i in range(opt.num_epochs - opt.snapshots, opt.num_epochs) ] encoder_dict = [ torch.load(encoder_path[i]) for i in range(opt.snapshots) ] height = encoder_dict[0]['height'] width = encoder_dict[0]['width'] else: # prepare just a single path encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) height = encoder_dict['height'] width = encoder_dict['width'] img_ext = '.png' if opt.png else '.jpg' dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, height, width, [0], 4, is_train=False, img_ext=img_ext) dataloader = DataLoader(dataset, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) if nets > 1: # load multiple encoders and decoders encoder = [ legacy.ResnetEncoder(opt.num_layers, False) for i in range(nets) ] depth_decoder = [ networks.DepthUncertaintyDecoder(encoder[i].num_ch_enc, num_output_channels=1, uncert=(opt.log or opt.repr), dropout=opt.dropout) for i in range(nets) ] model_dict = [encoder[i].state_dict() for i in range(nets)] for i in range(nets): encoder[i].load_state_dict({ k: v for k, v in encoder_dict[i].items() if k in model_dict[i] }) depth_decoder[i].load_state_dict(torch.load(decoder_path[i])) encoder[i].cuda() encoder[i].eval() depth_decoder[i].cuda() depth_decoder[i].eval() else: # load a single encoder and decoder encoder = legacy.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthUncertaintyDecoder(encoder.num_ch_enc, num_output_channels=1, uncert=(opt.log or opt.repr), dropout=opt.dropout) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() # accumulators for depth and uncertainties pred_disps = [] pred_uncerts = [] print("-> Computing predictions with size {}x{}".format(width, height)) with torch.no_grad(): bar = progressbar.ProgressBar(max_value=len(dataloader)) for i, data in enumerate(dataloader): input_color = data[("color", 0, 0)].cuda() # updating progress bar bar.update(i) if opt.post_process: # post-processed results require each image to have two forward passes input_color = torch.cat( (input_color, torch.flip(input_color, [3])), 0) if nets > 1: # infer multiple predictions from multiple networks disps_distribution = [] uncerts_distribution = [] for i in range(nets): output = depth_decoder[i](encoder[i](input_color)) disps_distribution.append( torch.unsqueeze(output[("disp", 0)], 0)) if opt.log: uncerts_distribution.append( torch.unsqueeze(torch.exp(output[("uncert", 0)]), 0)) disps_distribution = torch.cat(disps_distribution, 0) if opt.log: # bayesian uncertainty pred_uncert = torch.var( disps_distribution, dim=0, keepdim=False) + torch.sum( torch.cat(uncerts_distribution, 0), dim=0, keepdim=False) else: # uncertainty as variance of the predictions pred_uncert = torch.var(disps_distribution, dim=0, keepdim=False) pred_uncert = pred_uncert.cpu()[0].numpy() output = torch.mean(disps_distribution, dim=0, keepdim=False) pred_disp, _ = disp_to_depth(output, opt.min_depth, opt.max_depth) elif opt.dropout: # infer multiple predictions from multiple networks with dropout disps_distribution = [] uncerts = [] # we infer 8 predictions as the number of bootstraps and snaphots for i in range(8): output = depth_decoder(encoder(input_color)) disps_distribution.append( torch.unsqueeze(output[("disp", 0)], 0)) disps_distribution = torch.cat(disps_distribution, 0) # uncertainty as variance of the predictions pred_uncert = torch.var(disps_distribution, dim=0, keepdim=False).cpu()[0].numpy() # depth as mean of the predictions output = torch.mean(disps_distribution, dim=0, keepdim=False) pred_disp, _ = disp_to_depth(output, opt.min_depth, opt.max_depth) else: output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) if opt.log: # log-likelihood maximization pred_uncert = torch.exp(output[("uncert", 0)]).cpu()[:, 0].numpy() elif opt.repr: # learned reprojection pred_uncert = (output[("uncert", 0)]).cpu()[:, 0].numpy() pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: # applying Monodepthv1 post-processing to improve depth and get uncertainty N = pred_disp.shape[0] // 2 pred_uncert = np.abs(pred_disp[:N] - pred_disp[N:, :, ::-1]) pred_disp = batch_post_process_disparity( pred_disp[:N], pred_disp[N:, :, ::-1]) pred_uncerts.append(pred_uncert) pred_disps.append(pred_disp) # uncertainty normalization if opt.log or opt.repr or opt.dropout or nets > 1: pred_uncert = (pred_uncert - np.min(pred_uncert)) / ( np.max(pred_uncert) - np.min(pred_uncert)) pred_uncerts.append(pred_uncert) pred_disps = np.concatenate(pred_disps) if do_uncert: pred_uncerts = np.concatenate(pred_uncerts) # saving 16 bit depth and uncertainties print("-> Saving 16 bit maps") gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] if not os.path.exists(os.path.join(opt.output_dir, "raw", "disp")): os.makedirs(os.path.join(opt.output_dir, "raw", "disp")) if not os.path.exists(os.path.join(opt.output_dir, "raw", "uncert")): os.makedirs(os.path.join(opt.output_dir, "raw", "uncert")) if opt.qual: if not os.path.exists(os.path.join(opt.output_dir, "qual", "disp")): os.makedirs(os.path.join(opt.output_dir, "qual", "disp")) if do_uncert: if not os.path.exists( os.path.join(opt.output_dir, "qual", "uncert")): os.makedirs(os.path.join(opt.output_dir, "qual", "uncert")) bar = progressbar.ProgressBar(max_value=len(pred_disps)) for i in range(len(pred_disps)): bar.update(i) if opt.eval_stereo: # save images scaling with KITTI baseline cv2.imwrite( os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i), (pred_disps[i] * (dataset.K[0][0] * gt_depths[i].shape[1]) * 256. / 10).astype(np.uint16)) elif opt.eval_mono: # save images scaling with ground truth median ratio = get_mono_ratio(pred_disps[i], gt_depths[i]) cv2.imwrite( os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i), (pred_disps[i] * (dataset.K[0][0] * gt_depths[i].shape[1]) * 256. / ratio / 10.).astype(np.uint16)) else: # save images scaling with custom factor cv2.imwrite( os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i), (pred_disps[i] * (opt.custom_scale) * 256. / 10).astype( np.uint16)) if do_uncert: # save uncertainties cv2.imwrite( os.path.join(opt.output_dir, "raw", "uncert", '%06d_10.png' % i), (pred_uncerts[i] * (256 * 256 - 1)).astype(np.uint16)) if opt.qual: # save colored depth maps plt.imsave(os.path.join(opt.output_dir, "qual", "disp", '%06d_10.png' % i), pred_disps[i], cmap='magma') if do_uncert: # save colored uncertainty maps plt.imsave(os.path.join(opt.output_dir, "qual", "uncert", '%06d_10.png' % i), pred_uncerts[i], cmap='hot') # see you next time! print("\n-> Done!")
def test_simple(image_path, image_size, model_name): """Function to predict for a single image or folder of images """ device = torch.device("cpu") download_model_if_doesnt_exist(model_name) model_path = os.path.join("models", model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() # FINDING INPUT IMAGES if os.path.isfile(image_path): # Only testing on a single image paths = [image_path] output_directory = os.path.dirname(image_path) elif os.path.isdir(image_path): # Searching folder for images paths = glob.glob(os.path.join(image_path, '*.jpg')) output_directory = image_path else: raise Exception("Can not find image_path: {}".format(image_path)) print("-> Predicting on {:d} test images".format(len(paths))) # PREDICTING ON EACH IMAGE IN TURN with torch.no_grad(): for idx, image_path in enumerate(paths): if image_path.endswith("_disp.jpg"): # don't try to predict disparity for a disparity image! continue # Load image and preprocess input_image = pil.open(image_path).resize(image_size).convert( 'RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) im = pil.fromarray(colormapped_im) name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name)) im.save(name_dest_im) print(" Processed {:d} of {:d} images - saved prediction to {}". format(idx + 1, len(paths), name_dest_im)) print('-> Done!') return colormapped_im
def test_simple(args): """Function to predict for a single image or folder of images """ if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") print("-> Loading weights from ", args.load_weights_folder) encoder_path = os.path.join(args.load_weights_folder, "encoder.pth") depth_decoder_path = os.path.join(args.load_weights_folder, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = legacy.ResnetEncoder(args.num_layers, False) loaded_dict_enc = torch.load(encoder_path) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthUncertaintyDecoder(encoder.num_ch_enc, num_output_channels=1, uncert=True, dropout=args.dropout) depth_decoder.load_state_dict(torch.load(depth_decoder_path)) depth_decoder.to(device) depth_decoder.eval() # FINDING INPUT IMAGES if os.path.isfile(args.image_path): # Only testing on a single image paths = [args.image_path] output_directory = os.path.dirname(args.image_path) elif os.path.isdir(args.image_path): # Searching folder for images paths = glob.glob(os.path.join(args.image_path, '*.{}'.format(args.ext))) output_directory = args.image_path else: raise Exception("Can not find args.image_path: {}".format(args.image_path)) print("-> Predicting on {:d} test images".format(len(paths))) # PREDICTING ON EACH IMAGE IN TURN with torch.no_grad(): for idx, image_path in enumerate(paths): if image_path.endswith("_disp.jpg"): # don't try to predict disparity for a disparity image! continue # Load image and preprocess input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) uncert = outputs[("uncert", 0)] uncert_resized = torch.nn.functional.interpolate( uncert, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] #name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) #scaled_disp, _ = disp_to_depth(disp, 0.1, 100) #np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() disp_vmax = np.percentile(disp_resized_np, 95) disp_normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=disp_vmax) disp_mapper = cm.ScalarMappable(norm=disp_normalizer, cmap='magma') disp_colormapped_im = (disp_mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) disp_im = pil.fromarray(disp_colormapped_im) name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name)) disp_im.save(name_dest_im) # Saving colormapped uncertainty image uncert_resized_np = uncert_resized.squeeze().cpu().numpy() uncert_vmax = np.percentile(uncert_resized_np, 95) uncert_normalizer = mpl.colors.Normalize(vmin=uncert_resized_np.min(), vmax=uncert_vmax) uncert_mapper = cm.ScalarMappable(norm=uncert_normalizer, cmap='hot') uncert_colormapped_im = (uncert_mapper.to_rgba(uncert_resized_np)[:, :, :3] * 255).astype(np.uint8) uncert_im = pil.fromarray(uncert_colormapped_im) name_uncert_im = os.path.join(output_directory, "{}_uncert.jpeg".format(output_name)) uncert_im.save(name_uncert_im) print('-> Done!')