def __test_simple__(self): with torch.no_grad(): for idx, image_path in enumerate(self.paths): # if image_path.endswith("_disp.jpg"): # # don't try to predict disparity for a disparity image! # continue # Load image and preprocess try: # input_image = pil.open(image_path).convert('RGB') # original_width, original_height = input_image.size # input_image = input_image.resize((self.feed_width, self.feed_height), pil.LANCZOS) # input_image = transforms.ToTensor()(input_image).unsqueeze(0) input_image = cv2.imread(image_path) input_image = cv2.resize(input_image,(self.feed_width, self.feed_height)) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(self.device) features = self.encoder(input_image) disp = self.depth_decoder(features[0],features[1],features[2],features[3],features[4]) #disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (192, 640), mode="bilinear", align_corners=False) # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] name_dest_npy = os.path.join(self.output_directory, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) im = pil.fromarray(colormapped_im) name_dest_im = os.path.join(self.output_directory, "{}_disp.jpeg".format(output_name)) im.save(name_dest_im) except : print("File is not found.")
def prediction(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] framework_mode = opts['model']['mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] components = opts['model']['mode'] frame_sides = opts['frame_sides'] out_dir_base = Path(opts['out_dir_base']) # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=components) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) file_names.sort() #prediction loader # test_files = [] # for base in file_names: # test_files.append(data_path/base) # test_files.sort() if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) out_shows = [] if opts['out_dir']: out_dir = out_dir_base / opts['out_dir'] else: out_dir = out_dir_base / data_path.stem out_dir.mkdir_p() for data in tqdm(dataloader): input_color = input_frames(data, mode=framework_mode, frame_sides=frame_sides) features = encoder(input_color) disp = decoder(*features) pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) out_show = pred_disp out_show = out_show.cpu()[:, 0].numpy() out_shows.append(out_show) for idx, item in enumerate(out_shows): depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth') idx += 1 plt.imsave(out_dir / depth_name + '{}'.format('.png'), item[0], cmap='magma')
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] out_dir = Path(opts['out_dir']) out_dir.mkdir_p() sub_dirs = opts['sub_dirs'] for item in sub_dirs: (out_dir / item).mkdir_p() # metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'custom_mono': dataset = datasets.CustomMonoDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode='test') dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] idx = 0 for data in tqdm(dataloader): input_color = reframe(encoder_mode, data, frame_sides=frame_sides, key='color') input_color = input_color.cuda() features = encoder(input_color) disp = decoder(*features) # depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) if "depth" in sub_dirs: pred_depth = pred_depth.cpu()[:, 0].numpy()[0] depth = cv2.resize(pred_depth, (full_width, full_height)) depth = np_normalize_image(depth) cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'), depth * 255) if "disp" in sub_dirs: pred_disp = pred_disp.cpu()[:, 0].numpy()[0] disp = cv2.resize(pred_disp, (full_width, full_height)) disp = np_normalize_image(disp) cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'), disp * 255) idx += 1
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] for data in tqdm(dataloader): image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg') image = cv2.resize(image, (384, 288)) image = np.transpose(image, [2, 0, 1]) image = torch.tensor(image).cuda() / 255. image = image.unsqueeze(0) # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color') # input_color = input_color.cuda() features = encoder(image) disp = decoder(*features) depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) pred_depth = pred_depth.cpu()[:, 0].numpy() depth_gt = depth_gt.cpu()[:, 0].numpy() pred_depths.append(pred_depth) gt_depths.append(depth_gt) gt_depths = np.concatenate(gt_depths, axis=0) pred_depths = np.concatenate(pred_depths, axis=0) metrics = [] ratios = [] for gt, pred in zip(gt_depths, pred_depths): gt_height, gt_width = gt.shape[:2] pred = cv2.resize(pred, (gt_width, gt_height)) # crop # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的 if opts['dataset']['type'] == "kitti": # ???,可能是以前很老的 mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) pred = pred[mask] # 并reshape成1d gt = gt[mask] ratio = np.median(gt) / np.median( pred) # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可 ratios.append(ratio) pred *= ratio pred[pred < MIN_DEPTH] = MIN_DEPTH # 所有历史数据中最小的depth, 更新, pred[pred > MAX_DEPTH] = MAX_DEPTH # ... metric = compute_errors(gt, pred, mode=metric_mode) metrics.append(metric) metrics = np.array(metrics) mean_metrics = np.mean(metrics, axis=0) # print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_metrics.tolist()) + "\\\\") ratios = np.array(ratios) median = np.median(ratios) print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format( median, np.std(ratios / median)))
def main(args): """Function to predict for a single image or folder of images """ print(args.dataset_path) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") #download_model_if_doesnt_exist(args.model_path,args.model_name) model_path = Path(args.model_path) / args.model_name if not model_path.exists(): print(model_path + " does not exists") print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") #1 LOADING PRETRAINED MODEL #1.1 encoder print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() #1.2 decoder print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() #2. FINDING INPUT IMAGES dataset_path = Path(args.dataset_path) #files root = Path(os.path.dirname(__file__)) txt = root / 'splits' / args.split / args.txt_files print('-> inference file: ', txt) rel_paths = readlines(txt) #out if args.out_path != None: out_path = Path(args.out_path) else: out_path = Path('./' + dataset_path.stem + '_out') out_path.mkdir_p() files = [] #rel_paths 2 paths if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']: #kitti for item in rel_paths: item = item.split(' ') if item[2] == 'l': camera = 'image_02' elif item[2] == 'r': camera = 'image_01' files.append(dataset_path / item[0] / camera / 'data' / "{:010d}.png".format(int(item[1]))) elif args.split == 'mc': for item in rel_paths: #item = item.split('/') files.append(item) elif args.split == 'visdrone' or 'visdrone_lite': for item in rel_paths: item = item.split('/') files.append(dataset_path / item[0] / item[1] + '.jpg') else: for item in rel_paths: item = item.split('/') files.append(dataset_path / item[0] / item[1] + '.jpg') #2.1 cnt = 0 #3. PREDICTING ON EACH IMAGE IN TURN print('\n-> inference ' + args.dataset_path) files.sort() for image_path in tqdm(files): # Load image and preprocess if args.split == 'mc': input_image = pil.open(dataset_path / image_path + '.png').convert('RGB') else: input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) #torch.Size([1, 3, 192, 640]) features = encoder(input_image) #a list from 0 to 4 outputs = depth_decoder(features) # dict , 4 disptensor cnt += 1 disp = outputs[("disp", 0)] # has a same size with input disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file #if args.out_name=='num': if args.split == 'eigen' or args.split == 'custom': output_name = str(image_path).split('/')[-4] + '_{}'.format( image_path.stem) elif args.split == 'mc': block, p, color, frame = image_path.split('/') output_name = str(image_path).replace('/', '_') + '.png' elif args.split == 'visdrone' or args.split == 'visdrone_lite': output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') pass elif args.split == 'custom_mono': output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') else: output_name = image_path.relpath(dataset_path).strip( '.jpg').replace('/', '_') if args.npy_out: name_dest_npy = os.path.join(out_path, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) name_dest_im = Path(out_path) / "{}.png".format(output_name) plt.imsave(name_dest_im, disp_resized_np, cmap='magma', vmax=vmax) print(cnt) print('\n-> Done,save at ' + args.out_path)
def main_with_masks(args): """Function to predict for a single image or folder of images """ print(args.dataset_path) if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") out_path = Path(args.out_path) out_path.mkdir_p() dirs = {} for mask in args.results: dirs[mask] = (out_path / mask) (out_path / mask).mkdir_p() print('-> split:{}'.format(args.split)) print('-> save to {}'.format(args.out_path)) if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']: feed_height = 192 feed_width = 640 min_depth = 0.1 max_depth = 80 full_height = 375 full_width = 1242 dataset = KITTIRAWDataset elif args.split in ["visdrone", "visdrone_lite"]: feed_width = 352 feed_height = 192 min_depth = 0.1 max_depth = 255 dataset = VSDataset elif args.split in ['mc', 'mc_lite']: feed_height = 288 feed_width = 384 min_depth = 0.1 max_depth = 255 dataset = MCDataset feed_height = 192 feed_width = 640 backproject_depth = BackprojectDepth(1, feed_height, feed_width).to(device) project_3d = Project3D(1, feed_height, feed_width) photometric_error = PhotometricError() txt_files = args.txt_files #data test_path = Path(args.wk_root) / "splits" / args.split / txt_files test_filenames = readlines(test_path) if args.as_name_sort: #按照序列顺序名字排列 test_filenames.sort() #check filenames: i = 0 for i, item in enumerate(test_filenames): #item = test_filenames[i] if args.split in ['eigen', 'custom', 'custom_lite', 'eigen_zhou']: dirname, frame, lr = test_filenames[i].split() files = (Path(args.dataset_path) / dirname / 'image_02/data').files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' if args.split in ['mc', 'mc_lite']: #虽然在split的时候已经处理过了 block, trajactory, color, frame = test_filenames[i].split('/') files = (Path(args.dataset_path) / block / trajactory / color).files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' pass if args.split in ['visdrone', 'visdrone_lite']: #虽然在split的时候已经处理过了 dirname, frame = test_filenames[i].split('/') files = (Path(args.dataset_path) / dirname).files() files.sort() min = int(files[0].stem) max = int(files[-1].stem) if int(frame) + args.frame_ids[0] <= min or int( frame) + args.frame_ids[-1] >= max: test_filenames[i] = '' while '' in test_filenames: test_filenames.remove('') test_dataset = dataset( # KITTIRAWData args.dataset_path, test_filenames, feed_height, feed_width, args.frame_ids, 1, is_train=False, img_ext=args.ext) test_loader = DataLoader( # train_datasets:KITTIRAWDataset dataset=test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print('->items num: {}'.format(len(test_loader))) #layers #download_model_if_doesnt_exist(args.model_path,args.model_name) model_path = Path(args.model_path) / args.model_name if not model_path.exists(): print(model_path + " does not exists") print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") #1 LOADING PRETRAINED MODEL #1.1 encoder print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() #1.2 decoder print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() #paths pose_encoder_path = Path(model_path) / "pose_encoder.pth" pose_decoder_path = Path(model_path) / 'pose.pth' # 2.1 pose encoder print(" Loading pretrained pose encoder") pose_encoder = networks.ResnetEncoder(18, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.to(device) pose_encoder.eval() # 2.2 pose decoder print(" Loading pretrained decoder") pose_decoder = networks.PoseDecoder(num_ch_enc=pose_encoder.num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) pose_loaded_dict = torch.load(pose_decoder_path, map_location=device) pose_decoder.load_state_dict(pose_loaded_dict) pose_decoder.to(device) pose_decoder.eval() source_scale = 0 scale = 0 for batch_idx, inputs in tqdm(enumerate(test_loader)): for key, ipt in inputs.items(): inputs[key] = ipt.to(device) features = encoder(inputs[("color", 0, 0)]) # a list from 0 to 4 outputs = depth_decoder(features) # dict , 4 disptensor disp = outputs[("disp", 0)] # has a same size with input #disp_resized = torch.nn.functional.interpolate(disp, (full_height, full_width), mode="bilinear", align_corners=False) _, depth = disp_to_depth(disp, min_depth, max_depth) for f_i in [args.frame_ids[0], args.frame_ids[-1]]: if f_i < 0: pose_inputs = [ inputs[("color", f_i, 0)], inputs[("color", 0, 0)] ] else: pose_inputs = [ inputs[("color", 0, 0)], inputs[("color", f_i, 0)] ] pose_inputs = torch.cat(pose_inputs, 1) features = pose_encoder(pose_inputs) axisangle, translation = pose_decoder([features]) outputs[("cam_T_cam", 0, f_i)] = transformation_from_parameters( axisangle[:, 0], translation[:, 0], invert=(f_i < 0)) # b44 T = outputs[("cam_T_cam", 0, f_i)] cam_points = backproject_depth(depth, inputs[("inv_K", 0)]) # D@K_inv pix_coords = project_3d(cam_points, inputs[("K", 0)], T) # K@D@K_inv outputs[("sample", f_i, 0)] = pix_coords # rigid_flow outputs[("color", f_i, 0)] = F.grid_sample(inputs[("color", f_i, 0)], outputs[("sample", f_i, 0)], padding_mode="border") # output"color" 就是i-warped # add a depth warp outputs[("color_identity", f_i, 0)] = inputs[("color", f_i, 0)] target = inputs[("color", 0, 0)] reprojection_losses = [] for frame_id in [args.frame_ids[0], args.frame_ids[-1]]: pred = outputs[("color", frame_id, 0)] reprojection_losses.append(photometric_error.run(pred, target)) reprojection_losses = torch.cat(reprojection_losses, 1) identity_reprojection_losses = [] for frame_id in [args.frame_ids[0], args.frame_ids[-1]]: pred = inputs[("color", frame_id, source_scale)] identity_reprojection_losses.append( photometric_error.run(pred, target)) identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1) erro_maps = torch.cat( (identity_reprojection_losses, reprojection_losses), dim=1) # b4hw identical_mask = IdenticalMask(erro_maps) identical_mask = identical_mask[0].detach().cpu().numpy() save_name = test_filenames[batch_idx].replace('/', '_') save_name = save_name.replace('l', '') save_name = save_name.replace('r', '') save_name = save_name.replace(' ', '') if "identical_mask" in args.results: plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name), identical_mask) if "depth" in args.results: # Saving colormapped depth image disp_np = disp[0, 0].detach().cpu().numpy() vmax = np.percentile(disp_np, 95) plt.imsave(dirs['depth'] / "{}.png".format(save_name), disp_np, cmap='magma', vmax=vmax) if "mean_mask" in args.results: mean_mask = MeanMask(erro_maps) mean_mask = mean_mask[0].detach().cpu().numpy() plt.imsave(dirs['mean_mask'] / "{}.png".format(save_name), mean_mask, cmap='bone') if "identical_mask" in args.results: identical_mask = IdenticalMask(erro_maps) identical_mask = identical_mask[0].detach().cpu().numpy() plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name), identical_mask, cmap='bone') if "var_mask" in args.results: var_mask = VarMask(erro_maps) var_mask = var_mask[0].detach().cpu().numpy() plt.imsave(dirs["var_mask"] / "{}.png".format(save_name), var_mask, cmap='bone') if "final_mask" in args.results: identical_mask = IdenticalMask(erro_maps) mean_mask = MeanMask(erro_maps) var_mask = VarMask(erro_maps) final_mask = float8or(mean_mask * identical_mask, var_mask) final_mask = final_mask[0].detach().cpu().numpy() plt.imsave(dirs["final_mask"] / "{}.png".format(save_name), final_mask, cmap='bone')
def test_simple(args): """Function to predict for a single image or folder of images """ assert args.model_name is not None, \ "You must specify the --model_name parameter; see README.md for an example" if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") download_model_if_doesnt_exist(args.model_name) model_path = os.path.join("models", args.model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() # FINDING INPUT IMAGES if os.path.isfile(args.image_path): # Only testing on a single image paths = [args.image_path] output_directory = os.path.dirname(args.image_path) elif os.path.isdir(args.image_path): # Searching folder for images paths = glob.glob( os.path.join(args.image_path, '*.{}'.format(args.ext))) output_directory = args.image_path else: raise Exception("Can not find args.image_path: {}".format( args.image_path)) print("-> Predicting on {:d} test images".format(len(paths))) # PREDICTING ON EACH IMAGE IN TURN with torch.no_grad(): for idx, image_path in enumerate(paths): if image_path.endswith("_disp.jpg"): # don't try to predict disparity for a disparity image! continue # Load image and preprocess input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) im = pil.fromarray(colormapped_im) name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name)) im.save(name_dest_im) print(" Processed {:d} of {:d} images - saved prediction to {}". format(idx + 1, len(paths), name_dest_im)) print('-> Done!')
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 device = torch.device("cuda" if opt.gpu else "cpu") assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) img_ext = '.png' if opt.png else '.jpg' encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, img_ext=img_ext) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k.replace("module.",""): v for k, v in encoder_dict.items() if k.replace("module.","") in model_dict}) decoder_dict = torch.load(decoder_path) depth_decoder.load_state_dict({k.replace("module.",""): v for k, v in decoder_dict.items()}) encoder.to(device) encoder.eval() depth_decoder.to(device) depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].to(device) if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.save_pred_disps: output_path = os.path.join( opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) opt.disable_median_scaling = True opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] #gt_depth = cv2.resize(gt_depth, (opt.width, opt.height)) # Resize the gt depth # gt_depth = skimage.transform.resize(gt_depth, (opt.height, opt.width), order=0, preserve_range=True, mode='constant') gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor if not opt.disable_median_scaling: ratio = np.nanmedian(gt_depth) / np.nanmedian(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth)) if not opt.disable_median_scaling: ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 9).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3", "abs", "rmse")) print(("&{: 8.3f} " * 9).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 if not opt.eval_mono or opt.eval_stereo: print( "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" ) test_dir = Path(opt.test_dir) #1. load gt print('\n-> load gt:{}\n'.format(opt.test_dir)) gt_path = test_dir / "gt_depths.npz" gt_depths = np.load(gt_path, allow_pickle=True) gt_depths = gt_depths["data"] #2. load img data and predict, output is pred_disps(shape is [nums,1,w,h]) depth_eval_path = Path(opt.depth_eval_path) if not depth_eval_path.exists(): print("Cannot find a folder at {}".format(depth_eval_path)) print("-> Loading weights from {}".format(depth_eval_path)) #model loading filenames = readlines(test_dir / opt.test_files) encoder_path = depth_eval_path / "encoder.pth" decoder_path = depth_eval_path / "depth.pth" encoder_dict = torch.load(encoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() # dataloader dataset = datasets.KITTIRAWDatasetv1(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, batch_size=opt.eval_batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pred_disps = [] print("\n-> Computing predictions with size {}x{}\n".format( encoder_dict['width'], encoder_dict['height'])) #prediction for data in tqdm(dataloader): input_color = data[("color", 0, 0)].cuda() # if opt.post_process: # # Post-processed results require each image to have two forward passes # input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) #eval 0 output = depth_decoder(encoder(input_color)) #eval 1 pred_disp, pred_depth_tmp = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() #pred_depth = pred_depth.cpu()[:,0].numpy() # if opt.post_process: # N = pred_disp.shape[0] // 2 # pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) #endfor pred_disps = np.concatenate(pred_disps) # if opt.save_pred_disps: # output_path = depth_eval_path/ "disps_{}_split.npy".format(opt.test_dir) # print("-> Saving predicted disparities to ", output_path) # np.save(output_path, pred_disps) # if opt.no_eval: # print("-> Evaluation disabled. Done.") # quit() # elif test_dir.stem == 'benchmark': # save_dir = depth_eval_path/ "benchmark_predictions" # print("-> Saving out benchmark predictions to {}".format(save_dir)) # if not os.path.exists(save_dir): # os.makedirs(save_dir) # for idx in tqdm(range(len(pred_disps))): # disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) # depth = STEREO_SCALE_FACTOR / disp_resized # depth = np.clip(depth, 0, 80) # depth = np.uint16(depth * 256) # save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) # cv2.imwrite(save_path, depth) # print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") # quit() #3. evaluation print("-> Evaluating") # if opt.eval_stereo: # print(" Stereo evaluation - " # "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) # opt.median_scaling = False # opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR # else: # print(" Mono evaluation - using median scaling") metrics = [] ratios = [] nums_evaluate = pred_disps.shape[0] for i in tqdm(range(nums_evaluate)): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] #eval2 pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) #1271x341 t0 128x640 pred_depth = 1 / pred_disp # 也可以根据上面直接得到 #crop if test_dir.stem == "eigen" or test_dir.stem == 'custom': #???,可能是以前很老的 mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 #eval3 pred_depth = pred_depth[mask] #并reshape成1d gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor #median scaling if opt.median_scaling: ratio = np.median(gt_depth) / np.median( pred_depth) #中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可 ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH #所有历史数据中最小的depth, 更新, pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH #... metric = compute_errors(gt_depth, pred_depth) metrics.append(metric) metrics = np.array(metrics) #4. precess results, latex style output if opt.median_scaling: ratios = np.array(ratios) med = np.median(ratios) print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format( med, np.std(ratios / med))) mean_metrics = metrics.mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_metrics.tolist()) + "\\\\") print("\n-> Done!")