def set_dataset(self): """properly handle multiple dataset situation """ fpath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) train_dataset = datasets.KITTIRAWDataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=not self.opt.no_aug, load_seman = True, load_hints = self.opt.load_hints, hints_path = self.opt.hints_path, PreSIL_root = self.opt.PreSIL_path, kitti_gt_path = self.opt.kitti_gt_path, theta_gt_path=self.opt.theta_gt_path, surfnorm_gt_path=self.opt.surfnorm_gt_path ) val_dataset = datasets.KITTIRAWDataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, load_seman = True, load_hints = self.opt.load_hints, hints_path = self.opt.hints_path, PreSIL_root = self.opt.PreSIL_path, kitti_gt_path=self.opt.kitti_gt_path, theta_gt_path=self.opt.theta_gt_path, surfnorm_gt_path=self.opt.surfnorm_gt_path ) self.train_loader = DataLoader( train_dataset, self.opt.batch_size, shuffle=not self.opt.no_shuffle, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_loader = DataLoader( val_dataset, self.opt.batch_size, shuffle=True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.train_num = train_dataset.__len__() self.val_num = val_dataset.__len__() self.num_total_steps = self.train_num // self.opt.batch_size * self.opt.num_epochs
def set_dataset(self): fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) train_dataset = datasets.KITTIRAWDataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, load_meta=self.opt.load_meta, is_load_semantics=True, is_predicted_semantics=self.opt.is_predicted_semantics, load_morphed_depth=self.opt.load_morphed_depth, read_stereo=self.opt.read_stereo, stereo_meta=self.opt.SGMStereo_prediction_folder, morphFolder=self.opt.read_processed_results_path) val_dataset = datasets.KITTIRAWDataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, load_meta=self.opt.load_meta, is_load_semantics=True, read_stereo=self.opt.read_stereo, stereo_meta=self.opt.SGMStereo_prediction_folder, is_predicted_semantics=self.opt.is_predicted_semantics) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, shuffle=True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, shuffle=True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.train_num = train_dataset.__len__() self.val_num = val_dataset.__len__() self.num_total_steps = self.train_num // self.opt.batch_size * self.opt.num_epochs
def __init__(self, options): self.opt = options fpath = os.path.join(os.path.dirname(__file__), "../splits", self.opt.split, "{}_files.txt") self.train_filenames = readlines(fpath.format("train")) num_train_samples = len(self.train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = datasets.KITTIRAWDataset( self.opt.data_path, self.train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True and not self.opt.noAug, load_detect=self.opt.predins, detect_path=self.opt.detect_path, load_seman=self.opt.loadSeman, load_pose=self.opt.loadPose, loadPredDepth=self.opt.loadPredDepth, predDepthPath=self.opt.predDepthPath) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, shuffle=not self.opt.noShuffle, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)
def network_define(opt, data_path, height, width): opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, split_file)) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path, map_location=torch.device("cuda:1")) if opt.dataset_val[0] == "kitti": dataset = datasets.KITTIRAWDataset(data_path, filenames, height, width, [0], 4, is_train=False) elif opt.dataset_val[0] == "vkitti": dataset = datasets.VKITTIDataset(data_path, filenames, height, width, [0], 4, is_train=False) # dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, # pin_memory=True, drop_last=False) dataloader = DataLoader( dataset, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False, collate_fn=my_collate_fn ) ## the default collate_fn will fail because there are non-deterministic length sample encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict( torch.load(decoder_path, map_location=torch.device("cuda:1"))) encoder.cuda(1) encoder.eval() depth_decoder.cuda(1) depth_decoder.eval() return encoder, depth_decoder, dataloader, filenames
def main(): models = build_model(device=torch.device("cuda")) models, parameters = models print("models was loaded") print( f"Total params: {sum([get_params_num(m) for m in models.values()]) / 10 ** 6}" ) print(f"Train params: {sum([p.numel() for p in parameters])/ 10** 6}") assert False fpath = os.path.join(os.getcwd(), "splits", "eigen_zhou_small", "{}_files.txt") val_filenames = readlines(fpath.format("val")) img_ext = '.jpg' val_dataset = datasets.KITTIRAWDataset( "/home/ankarpov/Datasets/kitti_data", val_filenames, 192, 640, [0, 1, -1], 4, is_train=False, img_ext=img_ext) transform = pth_transforms.Compose([ # pth_transforms.Resize(target_size), pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) frame_ids = [0, 1, -1] input_tensor = torch.stack( [val_dataset[21][("color_aug", i, 0)] for i in frame_ids], dim=0) # input_tensor = transform(input_tensor) input_tensor = input_tensor[None] input_tensor = input_tensor.to(torch.device("cuda")) print("input shape: ", input_tensor.shape) print("tensor on cuda") all_features = models["encoder"](input_tensor) all_features["act"] = [torch.split(f, 1) for f in all_features["act"]] all_features["attn"] = [torch.split(f, 1) for f in all_features["attn"]] acts = {} attns = {} for i, k in enumerate(frame_ids): acts[k] = [act[i] for act in all_features["act"]] attns[k] = [attn[i] for attn in all_features["attn"]] print("Encoder was processed") depth = models["depth"](acts[0], attns[0]) print("Depth was processed") print("Depth shape: ", depth.shape) acts = acts[-1] attns = attns[-1] for f_i in frame_ids[1:]: if f_i < 0: act_inputs = (acts[f_i], acts[0]) attn_inputs = (attns[f_i], attns[0]) else: act_inputs = (acts[0], acts[f_i]) attn_inputs = (attns[0], attns[f_i]) act_inputs = torch.stack(act_inputs, dim=1) attn_inputs = torch.stack(attn_inputs, dim=1) # print(act_inputs.shape, attn_inputs.shape) axisangle, translation = models["pose"](act_inputs, attn_inputs) print(f"Pose frame {f_i} was processed") print(axisangle.shape, translation.shape) # attn = [t.cpu() for t in vit_out["attn"]] # act = [t.cpu() for t in vit_out["act"]] # torch.save(attn, "/home/ankarpov/tmp/attn_21.pt") # torch.save(act, "/home/ankarpov/tmp/act_21.pt") # torch.save(input_tensor.cpu(), "/home/ankarpov/tmp/input_21.pt") print("was forward")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 if opt.isCudaMorphing and opt.borderMorphLoss: bnmorph = BNMorph(height=opt.height, width=opt.width, sparsityRad=2).cuda() assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) # print("-> Loading weights from {}".format(opt.load_weights_folder)) if not opt.UseCustTest: filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) else: filenames = readlines(os.path.join(splits_dir, "eigen_test_toy", "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, tag=opt.dataset, img_ext = 'png', load_meta=opt.load_meta, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics = opt.is_predicted_semantics) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=True) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=(opt.switchMode == 'on'), isMulChannel=opt.isMulChannel) if opt.borderMorphLoss: tool = grad_computation_tools(batch_size=opt.batch_size, height=opt.height, width=opt.width).cuda() auto_morph = AutoMorph(height=opt.height, width=opt.width) foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle MorphitNum = 5 model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() if opt.set_eval_train: encoder.train() depth_decoder.train() # encoder.train() # depth_decoder.train() pred_disps = [] mergeDisp = Merge_MultDisp(opt.scales, batchSize = opt.batch_size) # print("-> Computing predictions with size {}x{}".format( # encoder_dict['width'], encoder_dict['height'])) count = 0 with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) features = encoder(input_color) outputs = dict() # outputs.update(depth_decoder(features, computeSemantic=True, computeDepth=False)) outputs.update(depth_decoder(features, computeSemantic=False, computeDepth=True)) mergeDisp(data, outputs, eval=True) # outputs['disp', 0] = F.interpolate(outputs['disp', 0], [opt.height, opt.width], mode='bilinear', align_corners=True) # pickle.dump(outputs, open("eval_outputs.p", "wb")) if opt.borderMorphLoss: for key, ipt in data.items(): if not (key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta' or key == 'file_add'): data[key] = ipt.to(torch.device("cuda")) foregroundMapGt = torch.ones([opt.batch_size, 1, opt.height, opt.width], dtype=torch.uint8, device=torch.device("cuda")) for m in foregroundType: foregroundMapGt = foregroundMapGt * (data['seman_gt'] != m) foregroundMapGt = (1 - foregroundMapGt).float() disparity_grad = torch.abs(tool.convDispx(outputs['disp', 0])) + torch.abs( tool.convDispy(outputs['disp', 0])) semantics_grad = torch.abs(tool.convDispx(foregroundMapGt)) + torch.abs( tool.convDispy(foregroundMapGt)) disparity_grad = disparity_grad * tool.zero_mask semantics_grad = semantics_grad * tool.zero_mask disparity_grad_bin = disparity_grad > tool.disparityTh semantics_grad_bin = semantics_grad > tool.semanticsTh if opt.isCudaMorphing: morphedx, morphedy, coeff = bnmorph.find_corresponding_pts(disparity_grad_bin, semantics_grad_bin) morphedx = (morphedx / (opt.width - 1) - 0.5) * 2 morphedy = (morphedy / (opt.height - 1) - 0.5) * 2 grid = torch.cat([morphedx, morphedy], dim=1).permute(0, 2, 3, 1) dispMaps_morphed = F.grid_sample(outputs['disp', 0], grid, padding_mode="border") else: disparity_grad_bin = disparity_grad_bin.detach().cpu().numpy() semantics_grad_bin = semantics_grad_bin.detach().cpu().numpy() disparityMap_to_processed = outputs['disp', 0].detach().cpu().numpy() dispMaps_morphed = list() changeingRecs = list() for mm in range(opt.batch_size): dispMap_morphed, changeingRec = auto_morph.automorph( disparity_grad_bin[mm, 0, :, :], semantics_grad_bin[mm, 0, :, :], disparityMap_to_processed[mm, 0, :, :]) dispMaps_morphed.append(dispMap_morphed) changeingRecs.append(changeingRec) dispMaps_morphed = torch.from_numpy(np.stack(dispMaps_morphed, axis=0)).unsqueeze(1).cuda() outputs[("disp", 0)] = dispMaps_morphed # tensor2disp(dispMaps_morphed, ind=0, vmax=0.09).show() # print(count) count = count + 1 pred_disp, _ = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() # Some check: # with open('train_outputs.p', 'rb') as handle: # train_outputs = pickle.load(handle) # pred_disp, pdepth = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth) # torch.mean(torch.abs(train_outputs[('disp', 0)] - outputs[("disp", 0)])) # torch.mean(torch.abs(train_outputs[('depth', 0, 0)] - pdepth)) if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.save_pred_disps: output_path = os.path.join( opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") quit() if not opt.UseCustTest: gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle = True)["data"] else: gt_depths = np.load("/media/shengjie/other/sceneUnderstanding/SDNET/splits/eigen_test_toy/gt_depths.npz", fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) opt.disable_median_scaling = True opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR if opt.EnableMedianScaleInEval: opt.disable_median_scaling = False else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp # Some check: # with open('recompare.p', 'rb') as handle: # train_outputs = pickle.load(handle) # calib_dir = '/media/shengjie/other/sceneUnderstanding/monodepth2/kitti_data/kitti_raw/2011_09_26' # velo_filename = '/media/shengjie/other/sceneUnderstanding/monodepth2/kitti_data/kitti_raw/2011_09_26/2011_09_26_drive_0002_sync/velodyne_points/data/0000000069.bin' # gt_depth2 = kitti_utils.generate_depth_map(calib_dir, velo_filename, 2, True) # # np.mean(np.abs(train_outputs['depth_gt'][0,0,:,:].cpu().numpy() - gt_depth)) # np.mean(np.abs(train_outputs['depth_pred'][0, 0, :, :].cpu().numpy() - pred_depth)) # pred_depth = pred_depth * train_outputs['scaleRation'].cpu().numpy() # # train_depth = F.interpolate(train_outputs[('depth', 0, 1)], [gt_height, gt_width], mode='bilinear', align_corners=True) # np.mean(np.abs(train_depth[0,0,:,:].cpu().numpy() - pred_depth)) if opt.eval_split == "eigen" or opt.UseCustTest: mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 # Some check: # with open('recompare.p', 'rb') as handle: # eval_outputs = pickle.load(handle) # np.mean(np.abs(eval_outputs['depth_gt'][0,0,:,:].cpu().numpy() -gt_depth )) pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor if not opt.disable_median_scaling: ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth, UseGtMedianScaling = (opt.UseGtMedianScaling == True))) if not opt.disable_median_scaling: ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 8).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3", "abs_shift")) print(("&{: 8.3f} " * 8).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!") if opt.isCudaMorphing and opt.borderMorphLoss: bnmorph.print_params()
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) if opt.use_stereo: opt.frame_ids.append("s") if opt.dataset == 'cityscape': dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], opt.frame_ids, 4, is_train=False, tag=opt.dataset, load_meta=True, is_sep_train_seman=False) elif opt.dataset == 'kitti': dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], opt.frame_ids, 4, is_train=False, tag=opt.dataset) else: raise ValueError("No predefined dataset") dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) encoder = networks.ResnetEncoder(opt.num_layers, False) if opt.switchMode == 'on': depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() # x = torch.ones(2, 2, requires_grad=True) # print(x) # y = x + 2 + x # y = y.detach() # print(y) # z = y * y * 3 # out = z.mean() # print(z, out) # out.backward() # print(x.grad) ##--------------------Visualization parameter here----------------------------## sfx = torch.nn.Softmax(dim=1) mergeDisp = Merge_MultDisp(opt.scales, batchSize=opt.batch_size, isMulChannel=opt.isMulChannel) svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual' index = 0 isvisualize = True viewEdgeMerge = False isHist = False useGtSeman = True viewSurfaceNormal = True viewSelfOcclu = True viewDispUp = True viewSmooth = True viewMulReg = True viewBorderRegress = False viewBorderSimilarity = False viewRandomSample = True viewSemanReg = False viewDepthGuess = False height = 256 width = 512 tensor23dPts = Tensor23dPts() if isHist: rec = np.zeros((19, 100)) if opt.isMulChannel: app = os.path.join('mulDispOn', opt.model_name) else: app = os.path.join('mulDispOff', opt.model_name) dirpath = os.path.join(svRoot, app) if not os.path.exists(dirpath): os.makedirs(dirpath) if viewEdgeMerge: comp1dgrad = Comp1dgrad().cuda() if viewSurfaceNormal: compsn = ComputeSurfaceNormal(height=height, width=width, batch_size=opt.batch_size).cuda() if viewSelfOcclu: selfclu = SelfOccluMask().cuda() with torch.no_grad(): for idx, inputs in enumerate(dataloader): # if idx != 12: # continue for key, ipt in inputs.items(): if not (key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'): inputs[key] = ipt.to(torch.device("cuda")) input_color = inputs[("color", 0, 0)].cuda() # input_color = torch.flip(input_color, dims=[3]) features = encoder(input_color) outputs = dict() outputs.update( depth_decoder(features, computeSemantic=True, computeDepth=False)) outputs.update( depth_decoder(features, computeSemantic=False, computeDepth=True)) # view the processed semantic seperate training data # for viewInd in range(opt.batch_size): # label = inputs['semanTrain_label'] # visualize_semantic(label[viewInd, 0, :, :].cpu().numpy()).show() # fig_rgb = inputs['semanTrain_rgb'][viewInd, :, :, :].permute(1, 2, 0).cpu().numpy() # fig_rgb = (fig_rgb * 255).astype(np.uint8) # fig_rgb = pil.fromarray(fig_rgb) # fig_rgb.show() if isHist: mulDisp = outputs[('mul_disp', 0)] scaled_disp, mulDepth = disp_to_depth(mulDisp, 0.1, 100) mulDepth = mulDepth.cpu() for i in range(mulDisp.shape[1]): rec[i, :] += torch.histc(mulDepth[:, i, :, :], bins=100, min=0, max=100).numpy() if isvisualize: if useGtSeman: # outputs[('mul_disp', 0)][:,2,:,:] = outputs[('mul_disp', 0)][:,2,:,:] * 0 # outputs[('mul_disp', 0)][:, 12, :, :] = outputs[('mul_disp', 0)][:, 12, :, :] * 0 mergeDisp(inputs, outputs, eval=False) else: mergeDisp(inputs, outputs, eval=True) dispMap = outputs[('disp', 0)] scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100) depthMap = depthMap * STEREO_SCALE_FACTOR # _, mul_depthMap = disp_to_depth(outputs[('mul_disp', 0)], 0.1, 100) # mul_depthMap = mul_depthMap * STEREO_SCALE_FACTOR if viewDispUp: fig_dispup = compDispUp.visualize(scaled_disp, viewindex=index) if viewSmooth: rgb = inputs[('color_aug', 0, 0)] smoothfig = comSmooth.visualize(rgb=rgb, disp=scaled_disp, viewindex=index) if useGtSeman: fig_seman = tensor2semantic(inputs['seman_gt'], ind=index, isGt=True) else: fig_seman = tensor2semantic(outputs[('seman', 0)], ind=index) if viewSemanReg: foregroundType = [ 11, 12, 13, 14, 15, 16, 17, 18 ] # person, rider, car, truck, bus, train, motorcycle, bicycle softmaxedSeman = F.softmax(outputs[('seman', 0)], dim=1) forePredMask = torch.sum( softmaxedSeman[:, foregroundType, :, :], dim=1, keepdim=True) foreGtMask = torch.ones(dispMap.shape).cuda().byte() for m in foregroundType: foreGtMask = foreGtMask * (inputs['seman_gt'] != m) foreGtMask = 1 - foreGtMask foreGtMask = foreGtMask.float() forePredMask[forePredMask > 0.5] = 1 forePredMask[forePredMask <= 0.5] = 0 forePredMask = foreGtMask rdSampleSeman.visualizeBorderSample(dispMap, forePredMask, gtMask=foreGtMask, viewIndex=index) cm = plt.get_cmap('magma') viewForePred = forePredMask[index, :, :, :].squeeze( 0).detach().cpu().numpy() viewForePred = (cm(viewForePred) * 255).astype(np.uint8) # pil.fromarray(viewForePred).show() viewForeGt = foreGtMask[index, :, :, :].squeeze( 0).detach().cpu().numpy() viewForeGt = (cm(viewForeGt) * 255).astype(np.uint8) # pil.fromarray(viewForeGt).show() forePredictCombined = np.concatenate( [viewForePred, viewForeGt], axis=0) # pil.fromarray(forePredictCombined).show() pil.fromarray(forePredictCombined).save( os.path.join(dirpath, str(idx) + '_fg.png')) if viewDepthGuess: wallType = [2, 3, 4] # Building, wall, fence roadType = [0, 1, 9] # road, sidewalk, terrain foregroundType = [ 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18 ] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle wallTypeMask = torch.ones(dispMap.shape).cuda().byte() roadTypeMask = torch.ones(dispMap.shape).cuda().byte() foreGroundMask = torch.ones(dispMap.shape).cuda().byte() with torch.no_grad(): for m in wallType: wallTypeMask = wallTypeMask * (inputs['seman_gt'] != m) wallTypeMask = (1 - wallTypeMask).float() for m in roadType: roadTypeMask = roadTypeMask * (inputs['seman_gt'] != m) roadTypeMask = (1 - roadTypeMask).float() for m in foregroundType: foreGroundMask = foreGroundMask * ( inputs['seman_gt'] != m) foreGroundMask = (1 - foreGroundMask).float() originalSieze = [2048, 1024] # currentSize = np.array([dispMap.shape[3], dispMap.shape[2]]) # scaleFac = np.eye(4) # scaleFac[0,0] = currentSize[0] / originalSieze[0] # scaleFac[1,1] = currentSize[1] / originalSieze[1] # scaleFac = torch.Tensor(scaleFac).view(1,4,4).repeat(opt.batch_size, 1, 1).cuda() # scaledIntrinsic = scaleFac @ inputs['realIn'] scaledIntrinsic = inputs['realIn'] depthGuess.visualizeDepthGuess( realDepth=depthMap, dispAct=dispMap, foredgroundMask=foreGroundMask, wallTypeMask=wallTypeMask, groundTypeMask=roadTypeMask, intrinsic=scaledIntrinsic, extrinsic=inputs['realEx'], semantic=inputs['seman_gt_eval'], cts_meta=inputs['cts_meta'], viewInd=index) # realDepth, foredgroundMask, wallTypeMask, groundTypeMask, intrinsic, extrinsic fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index) fig_disp = tensor2disp(outputs[('disp', 0)], ind=index) fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d( depthMap, ind=index, intrinsic=inputs['cts_meta']['intrinsic'][index, :, :], extrinsic=inputs['cts_meta']['extrinsic'][index, :, :], gtmask=inputs['cts_meta']['mask'][index, :, :], gtdepth=inputs['cts_meta']['depthMap'][index, :, :], semanticMap=inputs['seman_gt_eval'][index, :, :]) # check: # torch.inverse(inputs['invcamK'][index, :, :] @ inputs['realIn'][index, :, :]) - inputs['cts_meta']['extrinsic'][index, :, :] fig_grad = None if viewSurfaceNormal: # surnorm = compsn.visualize(depthMap = depthMap, invcamK = inputs['invcamK'].cuda(), orgEstPts = veh_coord, gtEstPts = veh_coord_gt, viewindex = index) surnorm = compsn.visualize( depthMap=depthMap, invcamK=inputs['invcamK'].cuda(), orgEstPts=veh_coord, gtEstPts=veh_coord_gt, viewindex=index) surnormMap = compsn(depthMap=depthMap, invcamK=inputs['invcamK'].cuda()) if viewMulReg: depthMapLoc = depthMap / STEREO_SCALE_FACTOR skyId = 10 skyMask = inputs['seman_gt'] == skyId skyerr = objReg.visualize_regularizeSky(depthMapLoc, skyMask, viewInd=index) wallType = [2, 3, 4] # Building, wall, fence roadType = [0, 1, 9] # road, sidewalk, terrain permuType = [5, 7] # Pole, traffic sign chanWinSize = 5 wallMask = torch.ones_like(skyMask) roadMask = torch.ones_like(skyMask) permuMask = torch.ones_like(skyMask) with torch.no_grad(): for m in wallType: wallMask = wallMask * (inputs['seman_gt'] != m) wallMask = 1 - wallMask wallMask = wallMask[:, :, 1:-1, 1:-1] for m in roadType: roadMask = roadMask * (inputs['seman_gt'] != m) roadMask = 1 - roadMask roadMask = roadMask[:, :, 1:-1, 1:-1] for m in permuType: permuMask = permuMask * (inputs['seman_gt'] != m) permuMask = 1 - permuMask permuMask = permuMask[:, :, 1:-1, 1:-1] BdErrFig, viewRdErrFig = objReg.visualize_regularizeBuildingRoad( surnormMap, wallMask, roadMask, dispMap, viewInd=index) padSize = int((chanWinSize - 1) / 2) permuMask = permuMask[:, :, padSize:-padSize, padSize:-padSize] surVarFig = objReg.visualize_regularizePoleSign( surnormMap, permuMask, dispMap, viewInd=index) if viewBorderRegress: foregroundType = [ 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18 ] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle backgroundType = [ 0, 1, 2, 3, 4, 8, 9, 10 ] # road, sidewalk, building, wall, fence, vegetation, terrain, sky suppressType = [255] # Suppress no label lines # foreGroundMask = torch.sum(inputs['seman_gt'][:, foregroundType, :, :], dim=1, keepdim=True) # backGroundMask = torch.sum(inputs['seman_gt'][:, backgroundType, :, :], dim=1, keepdim=True) foreGroundMask = torch.ones(dispMap.shape).cuda().byte() backGroundMask = torch.ones(dispMap.shape).cuda().byte() suppresMask = torch.ones(dispMap.shape).cuda().byte() with torch.no_grad(): for m in foregroundType: foreGroundMask = foreGroundMask * ( inputs['seman_gt'] != m) foreGroundMask = 1 - foreGroundMask for m in backgroundType: backGroundMask = backGroundMask * ( inputs['seman_gt'] != m) backGroundMask = 1 - backGroundMask for m in suppressType: suppresMask = suppresMask * (inputs['seman_gt'] != m) suppresMask = 1 - suppresMask suppresMask = suppresMask.float() combinedMask = torch.cat( [foreGroundMask, backGroundMask], dim=1).float() # borderRegFig = borderRegress.visualize_computeBorder(dispMap, combinedMask, suppresMask = suppresMask, viewIndex=index) borderRegFig = None else: borderRegFig = None # if viewBorderSimilarity: # foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, # 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle # backgroundType = [0, 1, 2, 3, 4, 8, 9, # 10] # road, sidewalk, building, wall, fence, vegetation, terrain, sky # suppressType = [255] # Suppress no label lines # foreGroundMask = torch.ones(dispMap.shape).cuda().byte() # backGroundMask = torch.ones(dispMap.shape).cuda().byte() # suppresMask = torch.ones(dispMap.shape).cuda().byte() # # with torch.no_grad(): # for m in foregroundType: # foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m) # foreGroundMask = 1 - foreGroundMask # for m in backgroundType: # backGroundMask = backGroundMask * (inputs['seman_gt'] != m) # backGroundMask = 1 - backGroundMask # for m in suppressType: # suppresMask = suppresMask * (inputs['seman_gt'] != m) # suppresMask = 1 - suppresMask # suppresMask = suppresMask.float() # combinedMask = torch.cat([foreGroundMask, backGroundMask], dim=1).float() # # borderSimFig = borderSim.visualize_borderSimilarity(dispMap, foreGroundMask.float(), suppresMask = suppresMask, viewIndex=index) if viewRandomSample: foregroundType = [ 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18 ] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle backgroundType = [ 0, 1, 2, 3, 4, 8, 9, 10 ] # road, sidewalk, building, wall, fence, vegetation, terrain, sky suppressType = [255] # Suppress no label lines foreGroundMask = torch.ones(dispMap.shape).cuda().byte() backGroundMask = torch.ones(dispMap.shape).cuda().byte() suppresMask = torch.ones(dispMap.shape).cuda().byte() with torch.no_grad(): for m in foregroundType: foreGroundMask = foreGroundMask * ( inputs['seman_gt'] != m) foreGroundMask = 1 - foreGroundMask for m in suppressType: suppresMask = suppresMask * (inputs['seman_gt'] != m) suppresMask = 1 - suppresMask suppresMask = suppresMask.float() foreGroundMask = foreGroundMask.float() rdSampleOnBorder.visualize_randomSample(dispMap, foreGroundMask, suppresMask, viewIndex=index) # rdSampleOnBorder.randomSampleReg(dispMap, foreGroundMask) if viewEdgeMerge: grad_disp = comp1dgrad(outputs[('mul_disp', 0)]) fig_grad = tensor2disp(grad_disp, ind=index, vmax=1) fig_grad = fig_grad.resize([512, 256]) if viewSelfOcclu: fl = inputs[("K", 0)][:, 0, 0] bs = torch.abs(inputs["stereo_T"][:, 0, 3]) clufig, suppressedDisp = selfclu.visualize(dispMap, viewind=index) if fig_grad is not None: grad_seman = ( np.array(fig_grad)[:, :, 0:3].astype(np.float) * 0.7 + np.array(fig_seman).astype(np.float) * 0.3).astype( np.uint8) # combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_grad)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)] combined = [ grad_seman, np.array(fig_disp)[:, :, 0:3], np.array(fig_rgb) ] combined = np.concatenate(combined, axis=1) else: if viewSurfaceNormal and viewSelfOcclu: surnorm = surnorm.resize([512, 256]) surnorm_mixed = pil.fromarray( (np.array(surnorm) * 0.2 + np.array(fig_disp)[:, :, 0:3] * 0.8).astype( np.uint8)) disp_seman = ( np.array(fig_disp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(np.float) * 0.2).astype( np.uint8) supprressed_disp_seman = ( np.array(suppressedDisp)[:, :, 0:3].astype( np.float) * 0.8 + np.array(fig_seman).astype(np.float) * 0.2).astype( np.uint8) rgb_seman = ( np.array(fig_seman).astype(np.float) * 0.5 + np.array(fig_rgb).astype(np.float) * 0.5).astype( np.uint8) # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype( # np.float) * 0.7).astype(np.uint8) comb1 = np.concatenate([ np.array(supprressed_disp_seman)[:, :, 0:3], np.array(suppressedDisp)[:, :, 0:3] ], axis=1) comb2 = np.concatenate([ np.array(disp_seman)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3] ], axis=1) comb3 = np.concatenate([ np.array(surnorm_mixed)[:, :, 0:3], np.array(surnorm)[:, :, 0:3] ], axis=1) comb4 = np.concatenate([ np.array(fig_seman)[:, :, 0:3], np.array(rgb_seman)[:, :, 0:3] ], axis=1) comb6 = np.concatenate([ np.array(clufig)[:, :, 0:3], np.array(fig_dispup)[:, :, 0:3] ], axis=1) fig3dsize = np.ceil( np.array([ comb4.shape[1], comb4.shape[1] / fig_3d.size[0] * fig_3d.size[1] ])).astype(np.int) comb5 = np.array(fig_3d.resize(fig3dsize)) # combined = np.concatenate([comb1, comb6, comb2, comb3, comb4, comb5], axis=0) combined = np.concatenate([comb1, comb2, comb4, comb3], axis=0) else: disp_seman = ( np.array(fig_disp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(np.float) * 0.2).astype( np.uint8) rgb_seman = ( np.array(fig_seman).astype(np.float) * 0.5 + np.array(fig_rgb).astype(np.float) * 0.5).astype( np.uint8) # combined = [np.array(disp_seman)[:,:,0:3], np.array(fig_disp)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)] combined = [ np.array(disp_seman)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3], np.array(fig_seman), np.array(rgb_seman) ] combined = np.concatenate(combined, axis=1) fig = pil.fromarray(combined) # fig.show() fig.save(os.path.join(dirpath, str(idx) + '.png')) if borderRegFig is not None: borderRegFig.save( os.path.join(dirpath, str(idx) + '_borderRegress.png')) # fig_3d.save(os.path.join(dirpath, str(idx) + '_fig3d.png')) # for k in range(10): # fig_disp = tensor2disp(outputs[('disp', 0)], ind=k) # fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=k) # combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_rgb)] # combined = np.concatenate(combined, axis=1) # fig = pil.fromarray(combined) # fig.save( # os.path.join('/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/MoredispOrg' + str(k) + '.png')) # fig_rgb.save(os.path.join(svRoot, app, 'rgb' + str(idx) + '.png')) # fig_seman.save(os.path.join(svRoot, app, 'semantic'+ str(idx) + '.png')) # fig_disp.save(os.path.join(svRoot, app, 'disp'+ str(idx) + '.png')) # a = inputs['seman_gt_eval'] # scaled_disp, _ = disp_to_depth(outputs[('disp', 0)], 0.1, 100) print("%dth saved" % idx) # If compute the histogram if isHist: svPath = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/mul_channel_depth' carId = 13 prob = copy.deepcopy(rec) ind = np.arange(prob.shape[1] * 2) for i in range(prob.shape[0]): prob[i, :] = prob[i, :] / np.sum(prob[i, :]) for i in range(prob.shape[0]): trainStr = trainId2label[i][0] fig, ax = plt.subplots() rects1 = ax.bar(ind[0::2], prob[carId, :], label='obj:car') rects2 = ax.bar(ind[1::2], prob[i, :], label='obj:' + trainStr) ax.set_ylabel('Meter in percentile') ax.set_xlabel('Meters') ax.set_title('Scale Changes between scale car and scale %s' % trainStr) ax.legend() plt.savefig(os.path.join(svPath, str(i)), dpi=200) plt.close(fig)
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] out_dir = Path(opts['out_dir']) out_dir.mkdir_p() sub_dirs = opts['sub_dirs'] for item in sub_dirs: (out_dir / item).mkdir_p() # metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'custom_mono': dataset = datasets.CustomMonoDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode='test') dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] idx = 0 for data in tqdm(dataloader): input_color = reframe(encoder_mode, data, frame_sides=frame_sides, key='color') input_color = input_color.cuda() features = encoder(input_color) disp = decoder(*features) # depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) if "depth" in sub_dirs: pred_depth = pred_depth.cpu()[:, 0].numpy()[0] depth = cv2.resize(pred_depth, (full_width, full_height)) depth = np_normalize_image(depth) cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'), depth * 255) if "disp" in sub_dirs: pred_disp = pred_disp.cpu()[:, 0].numpy()[0] disp = cv2.resize(pred_disp, (full_width, full_height)) disp = np_normalize_image(disp) cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'), disp * 255) idx += 1
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ is_use_disparity = True is_eval_morph = True is_cts_bst = True MIN_DEPTH = 1e-3 MAX_DEPTH = 80 if is_use_disparity: getDisp = get_disparity_predict() opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) if opt.dataset == 'cityscape': dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, tag=opt.dataset) elif opt.dataset == 'kitti': dataset = datasets.KITTISemanticDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, tag=opt.dataset) train_dataset_predict = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0,'s'], 4, tag='kitti', is_train=False, img_ext='png', load_meta=False, is_load_semantics=True, is_predicted_semantics=True, load_morphed_depth=False) train_dataset_gt = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0,'s'], 4, tag='kitti', is_train=False, img_ext='png', load_meta=False, is_load_semantics=True, is_predicted_semantics=False, load_morphed_depth=False) else: raise ValueError("No predefined dataset") dataloader_predict = DataLoader(train_dataset_predict, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) dataloader_gt = DataLoader(train_dataset_gt, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) dataloader_predict_iter = iter(dataloader_predict) dataloader_gt_iter = iter(dataloader_gt) encoder = networks.ResnetEncoder(opt.num_layers, False) if opt.switchMode == 'on': depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() sfx = torch.nn.Softmax(dim=1) depth_pos = '/media/shengjie/other/sceneUnderstanding/bts/result_bts_eigen/raw' print("Evaluation starts") width = 1216 height = 352 height_s = int(0.40810811 * height) height_e = int(0.99189189 * height) width_s = int(0.03594771 * width) width_e = int(0.96405229 * width) if not is_use_disparity: ms = Morph_semantics(height=206, width=1129) else: ms = Morph_semantics(height=218, width=1153) with torch.no_grad(): for idx in range(dataloader_gt.__len__()): inputs_predict = dataloader_predict_iter.__next__() inputs_gt = dataloader_gt_iter.__next__() if not is_cts_bst: inputs_predict['seman_gt_eval'] = inputs_predict['seman_gt_eval'] else: tcomp = filenames[idx].split(' ') path = os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/cts_best_seman', tcomp[0].split('/')[0] +'_' + tcomp[0].split('/')[1] + '_' + tcomp[1].zfill(10) + '.png') cts_pred = Image.open(path) cts_pred = np.array(cts_pred) for k in np.unique(cts_pred): cts_pred[cts_pred == k] = labels[k].trainId inputs_predict['seman_gt_eval'] = torch.from_numpy(cts_pred).unsqueeze(0) # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show() # tensor2semantic(inputs_gt['seman_gt_eval'].unsqueeze(1), ind=0).show() # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show() # input_color = inputs[("color", 0, 0)].cuda() # outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False) resized_gt = inputs_gt['seman_gt_eval'].unsqueeze(1) # resized_gt = F.interpolate(inputs_gt['seman_gt_eval'].unsqueeze(1).float(), [height, width], mode='nearest') # resized_gt = resized_gt.squeeze(1).byte() resized_pred = F.interpolate(inputs_predict['seman_gt_eval'].unsqueeze(1).float(), [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='nearest') resized_pred = resized_pred.byte() resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)], [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='bilinear', align_corners=True) resized_pred_list = list() resized_morph_list = list() groundTruthNp_list = list() if not is_use_disparity: t_height = resized_gt.shape[2] t_width = resized_gt.shape[3] top_margin = int(t_height - 352) left_margin = int((t_width - 1216) / 2) resized_gt = resized_gt[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216] resized_pred = resized_pred[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216] # tensor2semantic(resized_gt, ind=0).show() # tensor2semantic(resized_pred, ind=0).show() resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)], [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='bilinear', align_corners=True) resized_rgb = resized_rgb[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216] pred_depth = get_depth_predict(filenames[idx]) resized_depth = pred_depth # resized_gt = resized_gt.cpu().numpy().astype(np.uint8) # resized_pred = resized_pred.cpu().numpy().astype(np.uint8) # resized_depth = pred_depth # visualize_semantic(gt[0,:,:]).show() # visualize_semantic(pred[0,:,:]).show() # pred_depth = get_depth_predict(filenames[idx]) # pred_depth = F.interpolate(pred_depth.float(), [height, width], mode='bilinear', align_corners=True) # resized_pred = resized_pred.unsqueeze(1) # resized_gt = resized_gt.unsqueeze(1) # tensor2semantic(resized_pred, ind=0).show() # tensor2semantic(resized_gt, ind=0).show() # tensor2disp(1 / pred_depth, vmax=0.15, ind=0).show() # disp_map = tensor2disp(1 / pred_depth, vmax=0.15, ind=0) # disp_map_combined = combined_2_img(disp_map, tensor2rgb(resized_rgb, ind=0), 0.5) pred_depth_cropped = resized_depth[:,:,height_s : height_e, width_s : width_e] resized_pred_cropped = resized_pred[:,:,height_s : height_e, width_s : width_e] resized_gt_cropped = resized_gt[:,:,height_s : height_e, width_s : width_e] resized_rgb_cropped = resized_rgb[:,:,height_s : height_e, width_s : width_e] # tensor2semantic(resized_pred_cropped, ind=0).show() # tensor2semantic(resized_gt_cropped, ind=0).show() # tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0).show() figseman_gt = tensor2semantic(resized_gt_cropped, ind=0) figseman_pred = tensor2semantic(resized_pred_cropped, ind=0) figdisp = tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0) combined_2_img(figseman_pred, figdisp, 0.7).show() combined_2_img(figseman_gt, figdisp, 0.7).show() seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped) else: pred_depth = getDisp.read_disparity_predict(filenames[idx]) pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).unsqueeze(0) pred_depth = F.interpolate(pred_depth, [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='bilinear', align_corners=True) # tensor2disp(pred_depth, ind=0, percentile=95).show() if pred_depth.shape[2] < 371 or pred_depth.shape[3] < 1197: print("Error") pred_depth_cropped = pred_depth[:, :, 153:371, 44:1197] resized_pred_cropped = resized_pred[:, :, 153:371, 44:1197] resized_gt_cropped = resized_gt[:, :, 153:371, 44:1197] resized_rgb_cropped = resized_rgb[:, :, 153:371, 44:1197] # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0) # figseman = tensor2semantic(resized_gt_cropped, ind=0) # figcombined = combined_2_img(figdisp, figseman, 0.7) # figcombined.show() # # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0) # figseman = tensor2semantic(resized_pred_cropped, ind=0) # figcombined = combined_2_img(figdisp, figseman, 0.7) # figcombined.show() seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped) ms.compute_edge_distance(pred_depth_cropped, resized_pred_cropped, resized_gt_cropped) resized_pred_list.append(resized_pred_cropped.squeeze(1).detach().cpu().numpy()) resized_morph_list.append(seman_morphed.squeeze(1).detach().cpu().numpy().astype(np.uint8)) groundTruthNp_list.append(resized_gt_cropped.squeeze(1).detach().cpu().numpy()) sv_path = '/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph' gt_blended = combined_2_img(tensor2semantic(resized_gt_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2) pred_blended = combined_2_img(tensor2semantic(resized_pred_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2) morph_blended = combined_2_img(tensor2semantic(seman_morphed, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2) improved_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) > (resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte()) deterized_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) < ( resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte()) improve_blend = combined_2_img(tensor2disp(improved_region, vmax = 1, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.6) deterized_blend = combined_2_img(tensor2disp(deterized_region, vmax = 1, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.6) cat_img = concat_imgs([gt_blended, pred_blended, morph_blended, improve_blend, deterized_blend]) cat_img.save(os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph', str(idx) + '.png')) # groundTruthNp = resized_gt_cropped.squeeze(1).detach().cpu().numpy() # if is_eval_morph: # predictionNp = seman_morphed.byte().squeeze(1).detach().cpu().numpy() # else: # predictionNp = resized_pred_cropped.squeeze(1).detach().cpu().numpy() print("Finish %dth batch" % idx) ms.show_dis_comp() for pp in range(2): nbPixels = 0 count255 = 0 confMatrix = generateMatrix(args) for k in range(len(resized_pred_list)): groundTruthNp = groundTruthNp_list[k] if pp == 0: predictionNp = resized_pred_list[k] else: predictionNp = resized_morph_list[k] nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2] encoding_value = 256 # precomputed encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp values, cnt = np.unique(encoded, return_counts=True) for value, c in zip(values, cnt): pred_id = value % encoding_value gt_id = int((value - pred_id) / encoding_value) if pred_id == 255 or gt_id == 255: count255 = count255 + c continue if not gt_id in args.evalLabels: printError("Unknown label with id {:}".format(gt_id)) confMatrix[gt_id][pred_id] += c if confMatrix.sum() + count255!= nbPixels: printError( 'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format( confMatrix.sum(), nbPixels)) classScoreList = {} for label in args.evalLabels: labelName = trainId2label[label].name classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args) vals = np.array(list(classScoreList.values())) print(vals) mIOU = np.mean(vals[np.logical_not(np.isnan(vals))]) if pp == 0: print("Original mIOU is %f" % mIOU) else: print("Morphed mIOU is %f" % mIOU)
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 K = np.array([[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader( dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.eval_object: object_masks = [] for line in filenames: line = line.split() folder, frame_index = line[0], int(line[1]) object_mask_filename = os.path.join( os.path.dirname(__file__), "object_masks", folder, "{:010d}.npy".format(int(frame_index))) object_mask = np.load(object_mask_filename) object_masks.append(object_mask) if opt.save_pred_disps: output_path = os.path.join( opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) opt.scaling = "disable" opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] ex_logs = [] mean_scale = [] side_map = {"2": 2, "3": 3, "l": 2, "r": 3} #resize_ori = transforms.Resize((pred_disps.shape[1],pred_disps.shape[2]),interpolation=Image.ANTIALIAS) for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] line = filenames[i].split() folder = line[0] frame_index = line[1] side = side_map[line[2]] color = pil_loader(get_image_path(folder,int(frame_index),side)) #color = pil_loader('/mnt/sdb/xuefeng_data/dkit_dataset/20200629_mechanical_fast/images/{:006d}.png'.format(i)) #color = color.crop((0,191,640,383)) pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array( [0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) if opt.eval_object: object_mask = object_masks[i].astype(np.bool) else: mask = gt_depth > 0 if opt.scaling == "gt": ratio = np.median(gt_depth[mask]) / np.median(pred_depth[mask]) if opt.eval_object: mask = np.logical_and(mask, object_mask) elif opt.scaling == "dgc": scale_recovery = ScaleRecovery(1, gt_height, gt_width, K).cuda() #scale_recovery = ScaleRecovery(1, 192, 640, K).cuda() pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda() ratio1,surface_normal1,ground_mask1,cam_points1 = scale_recovery(pred_depth) #ratio = ratio1.cpu().item() surface_normal = surface_normal1.cpu()[0,0,:,:].numpy() ground_mask = ground_mask1.cpu()[0,0,:,:].numpy() pred_depth = pred_depth[0].cpu().numpy() cam_points=cam_points1.cpu().numpy() cam_points2=cam_points.transpose(1,2,0) cam_points_masked = cam_points2[np.where(ground_mask==1)] np.random.shuffle(cam_points_masked) cam_points4 = np.array(cam_points_masked) print(cam_points4.shape) cam_points4 = cam_points4[:2000,:] cam_points3 = np.concatenate((cam_points4, np.ones((cam_points4.shape[0], 1))), axis=1) print(cam_points3.shape) plane,inliers = fit_plane_LSE_RANSAC(cam_points3) #print(plane) ratio_rans = abs(1.65 / plane[-1]) else: ratio = 1 #print(ratio) #print(max(pred_depth)) #print(min(pred_depth)) pred_depth_ori = pred_depth*mask gt_depth_ori = gt_depth*mask pred_depth_ori = np.where(mask==1,pred_depth_ori,1) pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] #mean_scale.append(np.mean(gt_depth/pred_depth)) ''' error_try = 100 scale_abs = 0 for ratio_try in np.arange(0.1,50,step=0.1): pred_depth1=pred_depth * ratio_try error_tmp = compute_errors(gt_depth, pred_depth1)[0] #print(error_tmp) if error_tmp < error_try: error_try = error_tmp scale_abs = ratio_try div_scale = gt_depth_ori / pred_depth_ori #print(div_scale.shape) div_values1 = div_scale[mask] div_scale = (div_scale-scale_abs)/scale_abs div_values = div_scale[mask] div_rmse = sqrt(sum((div_values1-scale_abs)*(div_values1-scale_abs))/len(div_values1)) print(min(div_values),max(div_values)) ex_logs.append([i,min(div_values), max(div_values), div_rmse,scale_abs]) #print(div_scale.shape) #div_scale = div_scale/np.max(div_scale) mu = np.mean(div_values1) sigma = np.std(div_values1) print(min(div_values1),max(div_values1)) fig,ax=plt.subplots() n, bins, patches = ax.hist(div_values1,150,range=(3,130),density = True) y = norm.pdf(bins, mu, 0.8*sigma) ax.plot(bins, y, 'r') plt.xlabel('Scale') plt.ylabel('Density') plt.savefig(os.path.join(os.path.dirname(__file__), "hist_imgs2","{:010d}.jpg".format(i))) plt.close() #blend_img = blending_imgs(div_scale, color,i) #blend_img.save(os.path.join(os.path.dirname(__file__), "blend_imgs","{:010d}.jpg".format(i))) blending_imgs(surface_normal,color,i,'surface_normals') blending_imgs(ground_mask,color,i,'ground_masks') ''' blending_imgs(ground_mask,color,i,ground_mask) pred_depth *= ratio_rans ratios.append(ratio_rans) pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH #blending_imgs(div_scale, color,i,mask) if len(gt_depth) != 0: errors.append(compute_errors(gt_depth, pred_depth)) ''' fl = open('ex.txt','w') fl.writelines(str(ex_logs)) fl.close() ''' #np.save('mean_scale.npy', mean_scale) ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) # Load Encoder and Decoder encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() encoder_path = os.path.join( '/home/shengjie/Documents/Project_SemanticDepth/tmp/patchmatch_bs/weights_13', "encoder.pth") decoder_path = os.path.join( '/home/shengjie/Documents/Project_SemanticDepth/tmp/patchmatch_bs/weights_13', "depth.pth") encoder_dict = torch.load(encoder_path) encoder_bs = networks.ResnetEncoder(opt.num_layers, False) depth_decoder_bs = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3) model_dict = encoder.state_dict() encoder_bs.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder_bs.load_state_dict(torch.load(decoder_path)) encoder_bs.cuda() encoder_bs.eval() depth_decoder_bs.cuda() depth_decoder_bs.eval() filenames = readlines( '/home/shengjie/Documents/Project_SemanticDepth/splits/eigen/test_files.txt' ) opt.frame_ids.append("s") dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) count = 0 with torch.no_grad(): for idx, inputs in enumerate(dataloader): for key, ipt in inputs.items(): if not (key == 'entry_tag' or key == 'syn_tag'): inputs[key] = ipt.to(torch.device("cuda")) input_color = inputs[("color", 0, 0)].cuda() outputs = depth_decoder(encoder(input_color)) outputs_bs = depth_decoder_bs(encoder_bs(input_color)) for i in range(input_color.shape[0]): figbs = tensor2disp(outputs_bs[('disp', 0)][:, 2:3, :, :], vmax=0.1, ind=i) fig2 = tensor2disp(outputs[('disp', 0)][:, 2:3, :, :], vmax=0.1, ind=i) figrgb = tensor2rgb(inputs[("color", 0, 0)], ind=i) combined = np.concatenate( [np.array(figrgb), np.array(figbs), np.array(fig2)]) pil.fromarray(combined).save( os.path.join( '/media/shengjie/c9c81c9f-511c-41c6-bfe0-2fc19666fb32/Visualizations/Project_SemanDepth/vls_patchmatch_test_visualization', str(count) + '.png')) count = count + 1
# Load depth decoder network with weights loaded_dict = torch.load(depth_decoder_path) depth_decoder.load_state_dict(loaded_dict) # Set to eval mode on GPU encoder.cuda() depth_decoder.cuda() encoder.eval() depth_decoder.eval() # Load validation data print('Loading data...') data_path = join(dpath_root, dset_type) filenames = readlines(join('splits', split, 'val_files.txt')) dataset = datasets.KITTIRAWDataset(data_path, filenames, loaded_dict_enc['height'], loaded_dict_enc['width'], [0], num_scales, is_train=False, img_ext='.png') dataloader = DataLoader(dataset, 1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print('Loaded {} validation images from SPLIT: {} DATASET: {}'.format(len(dataloader), split, dset_type)) # Create dirs for model outputs dest_path = join(abspath('./outputs'), model_name) if not os.path.isdir(dest_path): os.makedirs(dest_path) if write_depths: os.makedirs(join(dest_path, 'dense_depth')) os.makedirs(join(dest_path, 'registered_depth')) if visualize: os.makedirs(join(dest_path, 'viz')) # Get predictions. Time the duration
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) if viewStereoMask: stereoMaskComputer = StereoMask() stereoMaskComputer.cuda() if viewSurfaceNormal: compsurfnorm = ComputeSurfaceNormal(height=opt.height, width=opt.width, batch_size=opt.batch_size) compsurfnorm.cuda() if viewTypeWiseRegularization: typeWReg = TypeWiseRegularization() typeWReg.cuda() if viewBorderWiseRegularization: borderWiseReg = BorderWiseRegularization(batchNum=opt.batch_size, width=opt.width, height=opt.height).cuda() if viewMonoMsak: monoMask = MonocularMask() monoMask.cuda() filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) tensor23dPts = Tensor23dPts(height=opt.height, width=opt.width) if opt.use_stereo: opt.frame_ids.append("s") dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,opt.height, opt.width, opt.frame_ids, 4, is_train=False, load_gt_semantics=opt.load_gt_semantics, load_gt_velodine=opt.load_gt_velodine) dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() dirpath = '/media/shengjie/other/sceneUnderstanding/semantic_regularized_unsupervised_depth_estimation/visualization' sv_path = os.path.join(dirpath, opt.model_name) index = 0 if viewMonoMsak: num_pose_frames = 2 posenet_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") posenet_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") posenet_encoder_dict = torch.load(posenet_encoder_path) posenet_decoder_dict = torch.load(posenet_decoder_path) posenet_encoder = networks.ResnetEncoder( opt.num_layers, opt.weights_init == "pretrained", num_input_images=num_pose_frames) posenet_decoder = networks.PoseDecoder( encoder.num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) posenet_encoder.load_state_dict({k: v for k, v in posenet_encoder_dict.items() if k in posenet_encoder_dict}) posenet_decoder.load_state_dict({k: v for k, v in posenet_decoder_dict.items() if k in posenet_decoder_dict}) posenet_encoder = posenet_encoder.cuda() posenet_decoder = posenet_decoder.cuda() if not os.path.exists(sv_path): os.makedirs(sv_path) with torch.no_grad(): for idx, inputs in enumerate(dataloader): for key, ipt in inputs.items(): if not(key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'): inputs[key] = ipt.to(torch.device("cuda")) input_color = inputs[("color", 0, 0)] features = encoder(input_color) outputs = dict() outputs.update(depth_decoder(features)) dispMap = outputs[('disp', 0)] scaledDisp, depthMap = disp_to_depth(dispMap, opt.min_depth, opt.max_depth) foreGroundMask = torch.ones(scaledDisp.shape, device=torch.device("cuda")).byte() scaled_smeantic_label = F.interpolate(inputs[('semantic_label', 0)].cpu().float(), size=(scaledDisp.shape[2], scaledDisp.shape[3]), mode='nearest').cuda().byte() for m in foregroundType: foreGroundMask = foreGroundMask * (scaled_smeantic_label != m) foreGroundMask = (1 - foreGroundMask) foreGroundMask = foreGroundMask.float() if viewStereoMask: scale = 0 T = inputs["stereo_T"] real_scale_disp = scaledDisp * (torch.abs(inputs[("K", scale)][:, 0, 0] * T[:, 0, 3]).view(opt.batch_size, 1, 1, 1).expand_as(scaledDisp)) stereoMask = stereoMaskComputer.computeMask(real_scale_disp, T[:, 0, 3]) stereoSemanticalMask = stereoMaskComputer.computeSemanticalMask(stereoMask, foreGroundMask, T[:, 0, 3]) # stereoMask_fig = tensor2disp(stereoMask, ind=index, vmax=1) # stereoSemanticalMask_fig = tensor2disp(stereoSemanticalMask, ind=index, vmax=1) # foreGroundMask_fig = tensor2disp(foreGroundMask, ind=index, vmax=1) if viewSurfaceNormal: surnormMap_fig = compsurfnorm.visualize(depthMap=depthMap, invcamK=inputs['invcamK'], viewindex = index) surnormMap = compsurfnorm(depthMap=depthMap, invcamK=inputs['invcamK']) if viewTypeWiseRegularization: wallType = [2, 3, 4] # Building, wall, fence roadType = [0, 1, 9] # road, sidewalk, terrain permuType = [5, 7] # Pole, traffic sign chanWinSize = 5 wallMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) roadMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) permuMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) for m in wallType: wallMask = wallMask * (scaled_smeantic_label != m) wallMask = 1 - wallMask wallMask = wallMask[:, :, 1:-1, 1:-1] for m in roadType: roadMask = roadMask * (scaled_smeantic_label != m) roadMask = 1 - roadMask roadMask = roadMask[:, :, 1:-1, 1:-1] for m in permuType: permuMask = permuMask * (scaled_smeantic_label != m) permuMask = 1 - permuMask permuMask = permuMask[:, :, 1:-1, 1:-1] BdErrFig, viewRdErrFig = typeWReg.visualize_regularizeBuildingRoad(surnormMap, wallMask, roadMask, dispMap, viewInd=index) padSize = int((chanWinSize - 1) / 2) permuMask = permuMask[:, :, padSize: -padSize, padSize: -padSize] surVarFig = typeWReg.visualize_regularizePoleSign(surnormMap, permuMask, dispMap, viewInd=index) if viewBorderWiseRegularization: wallType = [2, 3, 4] # Building, wall, fence roadType = [0, 1, 9] # road, sidewalk, terrain wallTypeMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) roadTypeMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) foreGroundMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8) for m in wallType: wallTypeMask = wallTypeMask * (scaled_smeantic_label != m) wallTypeMask = (1 - wallTypeMask).float() for m in roadType: roadTypeMask = roadTypeMask * (scaled_smeantic_label != m) roadTypeMask = (1 - roadTypeMask).float() for m in foregroundType: foreGroundMask = foreGroundMask * (scaled_smeantic_label != m) foreGroundMask = (1 - foreGroundMask).float() borderWiseReg.visualize( realDepth=depthMap, dispAct=depthMap, foredgroundMask=foreGroundMask, wallTypeMask=wallTypeMask, groundTypeMask=roadTypeMask, intrinsic=inputs['realIn'], extrinsic=inputs['realEx'], semantic=scaled_smeantic_label, viewInd=0) if viewMonoMsak: extrinsics = computePose(inputs, opt, depthMap, posenet_encoder, posenet_decoder) depthMap_cur = depthMap depthMap_prev = computeDepthMap(inputs['color', -1, 0], encoder, depth_decoder, opt.min_depth, opt.max_depth) depthMap_next = computeDepthMap(inputs['color', 1, 0], encoder, depth_decoder, opt.min_depth, opt.max_depth) pts_cur = depth23dpts(depthMap_cur, inputs['intrinsic']) pts_next = depth23dpts(depthMap_prev, inputs['intrinsic'], extrinsics) pts_prev = depth23dpts(depthMap_next, inputs['intrinsic'], extrinsics) if opt.eval_stereo: real_scale_depth = depthMap * STEREO_SCALE_FACTOR elif opt.eval_mono: ratio = torch.mean(inputs['depth_gt'][inputs['depth_gt'] > 0.1]) / torch.mean(depthMap) real_scale_depth = depthMap * ratio gtmask = (inputs['depth_gt'] > 0).float() gtdepth = inputs['depth_gt'] velo = inputs['velo'] tensor23dPts.visualize3d( real_scale_depth, ind=index, intrinsic_in=inputs['realIn'], extrinsic_in=inputs['realEx'], gtmask_in=gtmask, gtdepth_in=gtdepth, semanticMap=scaled_smeantic_label, velo_in=velo, rgb_in=inputs[('color', 's', 0)], disp_in=outputs[('disp', 0)] ) suppressed_disp_Map = dispMap * (1 - stereoSemanticalMask) semantic_fig = tensor2semantic(inputs[('semantic_label', 0)], ind=index, isGt=True).resize([opt.width, opt.height], pil.NEAREST) disp_fig = tensor2disp(dispMap, ind = index) suppressed_disp_Map_fig = tensor2disp(suppressed_disp_Map, ind = index) rgb_fig = tensor2rgb(inputs[("color", 0, 0)], ind = index) combined_fig1 = pil.fromarray((np.array(semantic_fig) * 0.15 + np.array(disp_fig)[:,:,0:3] * 0.85).astype(np.uint8)) combined_fig2 = pil.fromarray( (np.array(rgb_fig) * 0.2 + np.array(disp_fig)[:, :, 0:3] * 0.8).astype(np.uint8)) combined_fig = pil.fromarray(np.concatenate([np.array(combined_fig1), np.array(combined_fig2), np.array(suppressed_disp_Map_fig)[:,:,0:3], np.array(surnormMap_fig)], axis=0)) combined_fig.save(os.path.join(sv_path, str(idx) + ".png")) print("save %s" % (str(idx) + ".png"))
options = MonodepthOptions() opt = options.parse() splits_dir = os.path.join(os.path.dirname(__file__), "splits") opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) filenames = readlines( os.path.join(splits_dir, opt.split, "train_files.txt")) height = 288 width = 960 dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, height, width, [0], 4, is_train=False, tag=opt.dataset, img_ext='png', load_meta=opt.load_meta, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics=opt.is_predicted_semantics, load_morphed_depth=True) dataloader = DataLoader(dataset, 1, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pred_disps = []
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 ts = time.time() if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) # filenames = readlines(os.path.join(splits_dir, opt.split, "train_files.txt")) filenames = collect_all_entries(opt.data_path) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() mapping = {'l': 'image_02', 'r': 'image_03'} print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) save_dir = opt.save_dir print("-> Saving out predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) imgCount = 0 with torch.no_grad(): for idx, data in enumerate(dataloader): input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat( (input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity( pred_disp[:N], pred_disp[N:, :, ::-1]) depth = STEREO_SCALE_FACTOR / pred_disp depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) for k in range(depth.shape[0]): comps = filenames[imgCount].split(" ") save_folder = os.path.join(save_dir, comps[0], mapping[comps[2][0]]) os.makedirs(save_folder, exist_ok=True) save_path = os.path.join(save_folder, comps[1] + '.png') cv2.imwrite(save_path, depth[k, :, :]) te = time.time() imgCount = imgCount + 1 print("%d finished, %f hours left" % (idx, (te - ts) / imgCount * (len(filenames) - imgCount) / 60 / 60))
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 viewPythonVer = False viewCudaVer = True if viewCudaVer: bnmorph = BNMorph(height=opt.height, width=opt.width).cuda() opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) if opt.use_stereo: opt.frame_ids.append("s") if opt.dataset == 'cityscape': dataset = datasets.CITYSCAPERawDataset( opt.data_path, filenames, opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, load_meta=True, direction_left=opt.direction_left) elif opt.dataset == 'kitti': dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics=opt.is_predicted_semantics, direction_left=opt.direction_left) else: raise ValueError("No predefined dataset") dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_images=2) if opt.switchMode == 'on': depth_decoder = networks.DepthDecoder( encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel, outputtwoimage=(opt.outputtwoimage == True)) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() viewIndex = 0 tool = grad_computation_tools(batch_size=opt.batch_size, height=opt.height, width=opt.width).cuda() auto_morph = AutoMorph(height=opt.height, width=opt.width) with torch.no_grad(): for idx, inputs in enumerate(dataloader): for key, ipt in inputs.items(): if not (key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta' or key == 'file_add'): inputs[key] = ipt.to(torch.device("cuda")) input_color = torch.cat( [inputs[("color_aug", 0, 0)], inputs[("color_aug", 's', 0)]], dim=1).cuda() # input_color = inputs[("color", 0, 0)].cuda() # tensor2rgb(inputs[("color_aug", 0, 0)], ind=0).show() # tensor2rgb(inputs[("color_aug", 's', 0)], ind=0).show() features = encoder(input_color) outputs = dict() outputs.update( depth_decoder(features, computeSemantic=True, computeDepth=False)) outputs.update( depth_decoder(features, computeSemantic=False, computeDepth=True)) if not opt.view_right: disparityMap = outputs[('mul_disp', 0)][:, 0:1, :, :] else: disparityMap = outputs[('mul_disp', 0)][:, 1:2, :, :] depthMap = torch.clamp(disparityMap, max=80) fig_seman = tensor2semantic(inputs['seman_gt'], ind=viewIndex, isGt=True) fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=viewIndex) fig_disp = tensor2disp(disparityMap, ind=viewIndex, vmax=0.1) segmentationMapGt = inputs['seman_gt'] foregroundType = [ 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18 ] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle foregroundMapGt = torch.ones(disparityMap.shape).cuda().byte() for m in foregroundType: foregroundMapGt = foregroundMapGt * (segmentationMapGt != m) foregroundMapGt = (1 - foregroundMapGt).float() disparity_grad = torch.abs( tool.convDispx(disparityMap)) + torch.abs( tool.convDispy(disparityMap)) semantics_grad = torch.abs( tool.convDispx(foregroundMapGt)) + torch.abs( tool.convDispy(foregroundMapGt)) disparity_grad = disparity_grad * tool.zero_mask semantics_grad = semantics_grad * tool.zero_mask disparity_grad_bin = disparity_grad > tool.disparityTh semantics_grad_bin = semantics_grad > tool.semanticsTh # tensor2disp(disparity_grad_bin, ind=viewIndex, vmax=1).show() # tensor2disp(semantics_grad_bin, ind=viewIndex, vmax=1).show() if viewPythonVer: disparity_grad_bin = disparity_grad_bin.detach().cpu().numpy() semantics_grad_bin = semantics_grad_bin.detach().cpu().numpy() disparityMap_to_processed = disparityMap.detach().cpu().numpy( )[viewIndex, 0, :, :] dispMap_morphed, dispMap_morphRec = auto_morph.automorph( disparity_grad_bin[viewIndex, 0, :, :], semantics_grad_bin[viewIndex, 0, :, :], disparityMap_to_processed) fig_disp_processed = visualizeNpDisp(dispMap_morphed, vmax=0.1) overlay_processed = pil.fromarray( (np.array(fig_disp_processed) * 0.7 + np.array(fig_seman) * 0.3).astype(np.uint8)) overlay_org = pil.fromarray( (np.array(fig_disp) * 0.7 + np.array(fig_seman) * 0.3).astype(np.uint8)) combined_fig = pil.fromarray( np.concatenate([ np.array(overlay_org), np.array(overlay_processed), np.array(fig_disp), np.array(fig_disp_processed) ], axis=0)) combined_fig.save( "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization/border_morph_l2_3/" + str(idx) + ".png") if viewCudaVer: # morphedx, morphedy = bnmorph.find_corresponding_pts(disparity_grad_bin, semantics_grad_bin, disparityMap, fig_seman, 10) # morphedx = (morphedx / (opt.width - 1) - 0.5) * 2 # morphedy = (morphedy / (opt.height - 1) - 0.5) * 2 # grid = torch.cat([morphedx, morphedy], dim = 1).permute(0,2,3,1) # disparityMap_morphed = F.grid_sample(disparityMap, grid, padding_mode="border") # fig_morphed = tensor2disp(disparityMap_morphed, vmax=0.08, ind=0) # fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0) # fig_combined = pil.fromarray(np.concatenate([np.array(fig_morphed), np.array(fig_disp)], axis=0)) # fig_combined.show() svpath = os.path.join(opt.load_weights_folder).split('/') try: svpath = os.path.join( "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization", svpath[-3]) os.mkdir(svpath) except FileExistsError: a = 1 morphedx, morphedy, coeff = bnmorph.find_corresponding_pts( disparity_grad_bin, semantics_grad_bin) morphedx = (morphedx / (opt.width - 1) - 0.5) * 2 morphedy = (morphedy / (opt.height - 1) - 0.5) * 2 grid = torch.cat([morphedx, morphedy], dim=1).permute(0, 2, 3, 1) disparityMap_morphed = F.grid_sample(disparityMap, grid, padding_mode="border") fig_morphed = tensor2disp(disparityMap_morphed, vmax=0.08, ind=0) fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0) fig_morphed_overlayed = pil.fromarray( (np.array(fig_seman) * 0.5 + np.array(fig_morphed) * 0.5).astype(np.uint8)) fig_disp_overlayed = pil.fromarray( (np.array(fig_seman) * 0.5 + np.array(fig_disp) * 0.5).astype(np.uint8)) # fig_rgb = tensor2rgb(inputs[("color", 0, 0)], ind=0) # fig_combined = pil.fromarray(np.concatenate([np.array(fig_disp_overlayed), np.array(fig_morphed_overlayed), np.array(fig_disp), np.array(fig_morphed), np.array(fig_rgb)], axis=0)) fig_combined = pil.fromarray( np.concatenate([ np.array(fig_disp_overlayed), np.array(fig_morphed_overlayed), np.array(fig_disp), np.array(fig_morphed) ], axis=0)) fig_combined.save(os.path.join(svpath, str(idx) + ".png"))
def evaluate(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] encoder_mode = opts['model']['encoder_mode'] frame_sides = opts['frame_sides'] # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=encoder_mode) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="test") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) pred_depths = [] gt_depths = [] disps = [] for data in tqdm(dataloader): image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg') image = cv2.resize(image, (384, 288)) image = np.transpose(image, [2, 0, 1]) image = torch.tensor(image).cuda() / 255. image = image.unsqueeze(0) # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color') # input_color = input_color.cuda() features = encoder(image) disp = decoder(*features) depth_gt = data['depth_gt'] pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) #pred_depth = disp2depth(disp) pred_depth = pred_depth.cpu()[:, 0].numpy() depth_gt = depth_gt.cpu()[:, 0].numpy() pred_depths.append(pred_depth) gt_depths.append(depth_gt) gt_depths = np.concatenate(gt_depths, axis=0) pred_depths = np.concatenate(pred_depths, axis=0) metrics = [] ratios = [] for gt, pred in zip(gt_depths, pred_depths): gt_height, gt_width = gt.shape[:2] pred = cv2.resize(pred, (gt_width, gt_height)) # crop # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的 if opts['dataset']['type'] == "kitti": # ???,可能是以前很老的 mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH) pred = pred[mask] # 并reshape成1d gt = gt[mask] ratio = np.median(gt) / np.median( pred) # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可 ratios.append(ratio) pred *= ratio pred[pred < MIN_DEPTH] = MIN_DEPTH # 所有历史数据中最小的depth, 更新, pred[pred > MAX_DEPTH] = MAX_DEPTH # ... metric = compute_errors(gt, pred, mode=metric_mode) metrics.append(metric) metrics = np.array(metrics) mean_metrics = np.mean(metrics, axis=0) # print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_metrics.tolist()) + "\\\\") ratios = np.array(ratios) median = np.median(ratios) print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format( median, np.std(ratios / median)))
def export_gt_depths_kitti(): parser = argparse.ArgumentParser(description='export_pred_theta') parser.add_argument('--data_path', type=str, help='path to the root of the KITTI data', required=True) parser.add_argument('--save_dir', type=str, help='path to the root of save folder', required=True) parser.add_argument('--load_weights_folder', type=str, help='path to the root of save folder', required=True) parser.add_argument('--num_layers', type=int, default=18) parser.add_argument('--num_workers', type=int, default=16) parser.add_argument('--banvls', action='store_true') opt = parser.parse_args() os.makedirs(opt.save_dir, exist_ok=True) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() lines = collect_all_entries(opt.data_path) lines_valid = list() for line in lines: folder, frame_id, direction = line.split() frame_id = int(frame_id) velo_filename = os.path.join(opt.data_path, folder, "velodyne_points/data", "{:010d}.bin".format(frame_id)) if os.path.isfile(velo_filename): lines_valid.append(line) mapping = {'l': 'image_02', 'r': 'image_03'} mapping_cam = {'l': 2, 'r': 3} ts = time.time() imgCount = 0 dataset = datasets.KITTIRAWDataset(opt.data_path, lines_valid, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) with torch.no_grad(): for data in dataloader: allexist = True for i in range(data[('color', 0, 0)].shape[0]): folder, frame_id, direction, _, _ = data['entry_tag'][i].split() direction = direction[0] frame_id = int(frame_id) cklist = list() cklist.append(os.path.join(opt.save_dir, folder, 'htheta_flipped', mapping[direction], str(frame_id).zfill(10) + '.png')) cklist.append(os.path.join(opt.save_dir, folder, 'vtheta_flipped', mapping[direction], str(frame_id).zfill(10) + '.png')) cklist.append(os.path.join(opt.save_dir, folder, 'htheta', mapping[direction], str(frame_id).zfill(10) + '.png')) cklist.append(os.path.join(opt.save_dir, folder, 'vtheta', mapping[direction], str(frame_id).zfill(10) + '.png')) for cke in cklist: if not os.path.isfile(cke): allexist = False if allexist: continue outputs = dict() outputs_flipped = dict() input_color = data[("color", 0, 0)].cuda() input_color_flipped = torch.flip(input_color, dims=[3]) # tensor2rgb(input_color, ind=0).show() # tensor2rgb(input_color_flipped, ind=0).show() outputs.update(depth_decoder(encoder(input_color))) outputs_flipped.update(depth_decoder(encoder(input_color_flipped))) for i in range(outputs[('disp', 0)].shape[0]): folder, frame_id, direction, _, _ = data['entry_tag'][i].split() direction = direction[0] frame_id = int(frame_id) print("Exporting: Folder: %s, direction: %s, frame_id: %d" % (folder, direction, frame_id)) output_folder_h = os.path.join(opt.save_dir, folder, 'htheta', mapping[direction]) output_folder_v = os.path.join(opt.save_dir, folder, 'vtheta', mapping[direction]) os.makedirs(output_folder_h, exist_ok=True) os.makedirs(output_folder_v, exist_ok=True) save_path_h = os.path.join(output_folder_h, str(frame_id).zfill(10) + '.png') save_path_v = os.path.join(output_folder_v, str(frame_id).zfill(10) + '.png') thetah = outputs[('disp', 0)][i:i+1,0:1,:,:] * 2 * np.pi thetav = outputs[('disp', 0)][i:i + 1, 1:2, :, :] * 2 * np.pi thetahnp = thetah.squeeze(0).squeeze(0).cpu().numpy() thetavnp = thetav.squeeze(0).squeeze(0).cpu().numpy() thetahnp_towrite = (thetahnp * 10 * 256).astype(np.uint16) thetavnp_towrite = (thetavnp * 10 * 256).astype(np.uint16) cv2.imwrite(save_path_h, thetahnp_towrite) cv2.imwrite(save_path_v, thetavnp_towrite) # reopen_h = np.array(pil.open(save_path_h)).astype(np.float32) / 10 / 256 # reopen_v = np.array(pil.open(save_path_v)).astype(np.float32) / 10 / 256 # print(np.abs(reopen_h - thetahnp).max()) # print(np.abs(reopen_v - thetavnp).max()) if not opt.banvls: output_folder_hvls = os.path.join(opt.save_dir, folder, 'htheta_vls', mapping[direction]) output_folder_vvls = os.path.join(opt.save_dir, folder, 'vtheta_vls', mapping[direction]) os.makedirs(output_folder_hvls, exist_ok=True) os.makedirs(output_folder_vvls, exist_ok=True) figh = tensor2disp(thetah - 1, vmax=4, ind=0) figv = tensor2disp(thetav - 1, vmax=4, ind=0) save_path_hvls = os.path.join(output_folder_hvls, str(frame_id).zfill(10) + '.png') save_path_vvls = os.path.join(output_folder_vvls, str(frame_id).zfill(10) + '.png') figh.save(save_path_hvls) figv.save(save_path_vvls) output_folder_h = os.path.join(opt.save_dir, folder, 'htheta_flipped', mapping[direction]) output_folder_v = os.path.join(opt.save_dir, folder, 'vtheta_flipped', mapping[direction]) os.makedirs(output_folder_h, exist_ok=True) os.makedirs(output_folder_v, exist_ok=True) save_path_h = os.path.join(output_folder_h, str(frame_id).zfill(10) + '.png') save_path_v = os.path.join(output_folder_v, str(frame_id).zfill(10) + '.png') thetah = outputs_flipped[('disp', 0)][i:i+1,0:1,:,:] * 2 * np.pi thetav = outputs_flipped[('disp', 0)][i:i + 1, 1:2, :, :] * 2 * np.pi thetahnp = thetah.squeeze(0).squeeze(0).cpu().numpy() thetavnp = thetav.squeeze(0).squeeze(0).cpu().numpy() thetahnp_towrite = (thetahnp * 10 * 256).astype(np.uint16) thetavnp_towrite = (thetavnp * 10 * 256).astype(np.uint16) cv2.imwrite(save_path_h, thetahnp_towrite) cv2.imwrite(save_path_v, thetavnp_towrite) if not opt.banvls: output_folder_hvls = os.path.join(opt.save_dir, folder, 'htheta_vls_flipped', mapping[direction]) output_folder_vvls = os.path.join(opt.save_dir, folder, 'vtheta_vls_flipped', mapping[direction]) os.makedirs(output_folder_hvls, exist_ok=True) os.makedirs(output_folder_vvls, exist_ok=True) figh = tensor2disp(thetah - 1, vmax=4, ind=0) figv = tensor2disp(thetav - 1, vmax=4, ind=0) save_path_hvls = os.path.join(output_folder_hvls, str(frame_id).zfill(10) + '.png') save_path_vvls = os.path.join(output_folder_vvls, str(frame_id).zfill(10) + '.png') figh.save(save_path_hvls) figv.save(save_path_vvls) te = time.time() imgCount = imgCount + 1 print("%d finished, %f hours left" % (imgCount, (te - ts) / imgCount * (len(lines) - imgCount) / 60 / 60))
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat( (input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity( pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.save_pred_disps: output_path = os.path.join(opt.eval_out_dir, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print( "-> No ground truth is available for the KITTI benchmark, so not evaluating. Done." ) quit() elif opt.eval_split == 'mine_0319': save_dir = os.path.join(opt.eval_out_dir, "mine_0319_predictions") print("-> Saving out mine_0319 predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) # print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") # quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1')["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format( STEREO_SCALE_FACTOR)) opt.disable_median_scaling = True opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor if not opt.disable_median_scaling: ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth)) if not opt.disable_median_scaling: ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format( med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) # encoder's record of height and weight are of less important now if opt.use_stereo: opt.frame_ids.append("s") if opt.dataset == 'cityscape': dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames, opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, load_meta=True) elif opt.dataset == 'kitti': dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, is_load_semantics=True) else: raise ValueError("No predefined dataset") dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) encoder = networks.ResnetEncoder(opt.num_layers, False) if opt.switchMode == 'on': depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() ##--------------------Visualization parameter here----------------------------## sfx = torch.nn.Softmax(dim=1) mergeDisp = Merge_MultDisp(opt.scales, batchSize = opt.batch_size, isMulChannel = opt.isMulChannel) svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual' index = 0 isvisualize = True useGtSeman = True useSeman = False viewSurfaceNormal = False viewSelfOcclu = False viewMutuallyRegularizedBorder= False viewLiuSemanCompare = False viewSecondOrder = False viewBorderConverge = True expBin = True height = 288 width = 960 tensor23dPts = Tensor23dPts(height=height, width=width) dirpath = os.path.join(svRoot, opt.model_name) if not os.path.exists(dirpath): os.makedirs(dirpath) if viewSurfaceNormal: compsn = ComputeSurfaceNormal(height = height, width = width, batch_size = opt.batch_size).cuda() if viewSelfOcclu: selfclu = SelfOccluMask().cuda() if viewMutuallyRegularizedBorder: mrb = MutuallyRegularizedBorders(height=height, width=width, batchsize=opt.batch_size) iouFore_gtdepth2gtseman = list() iouBack_gtdepth2gtseman = list() iouValid_gtdepth2gtseman = list() iouFore_estdepth2gtseman = list() iouBack_estdepth2gtseman = list() iouValid_estdepth2gtseman = list() iouFore_estdepth2estseman = list() iouBack_estdepth2estseman = list() iouValid_estdepth2estseman = list() if viewLiuSemanCompare: cmpBCons = computeBorderDistance() compGrad = computeGradient() semanest2semangt = np.zeros(31) depth2disp = np.zeros(31) depth2semangt = np.zeros(31) disp2semanest = np.zeros(31) sfx = torch.nn.Softmax(dim=1) cmpBCons.cuda() compGrad.cuda() if viewSecondOrder: compSecGrad = SecondOrderGrad().cuda() if viewBorderConverge: borderConverge = BorderConverge(height, width, opt.batch_size).cuda() if expBin: expbinmap = expBinaryMap(height, width, opt.batch_size).cuda() computedNum = 0 # with torch.no_grad(): for idx, inputs in enumerate(dataloader): for key, ipt in inputs.items(): if not(key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'): inputs[key] = ipt.to(torch.device("cuda")) input_color = inputs[("color", 0, 0)].cuda() features = encoder(input_color) outputs = dict() outputs.update(depth_decoder(features, computeSemantic=True, computeDepth=False)) outputs.update(depth_decoder(features, computeSemantic=False, computeDepth=True)) if isvisualize: if useGtSeman: mergeDisp(inputs, outputs, eval=False) else: mergeDisp(inputs, outputs, eval=True) dispMap = outputs[('disp', 0)] scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100) depthMap = depthMap * STEREO_SCALE_FACTOR depthMap = torch.clamp(depthMap, max=80) if useGtSeman: fig_seman = tensor2semantic(inputs['seman_gt'], ind=index, isGt=True) else: if useSeman: fig_seman = tensor2semantic(outputs[('seman', 0)], ind=index) else: fig_seman = inputs[('color', 0, 0)][index, :, :, :].permute(1,2,0).cpu().numpy() fig_seman = (fig_seman * 255).astype(np.uint8) fig_seman = pil.fromarray(fig_seman) fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index) fig_disp = tensor2disp(outputs[('disp', 0)], ind=index, vmax=0.1) gtmask = (inputs['depth_gt'] > 0).float() gtdepth = inputs['depth_gt'] velo = inputs['velo'] fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d(depthMap.detach(), ind=index, intrinsic_in=inputs['realIn'], extrinsic_in=inputs['realEx'], gtmask_in=gtmask, gtdepth_in=gtdepth, semanticMap=None, velo_in=velo, rgb_in = inputs[('color', 's', 0)], disp_in = outputs[('disp', 0)].detach() ) if viewMutuallyRegularizedBorder: foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky foreGroundMask = torch.ones(dispMap.shape).cuda().byte() backGroundMask = torch.ones(dispMap.shape).cuda().byte() with torch.no_grad(): for m in foregroundType: foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m) foreGroundMask = 1 - foreGroundMask for m in backgroundType: backGroundMask = backGroundMask * (inputs['seman_gt'] != m) backGroundMask = 1 - backGroundMask # tensor2disp(foreGroundMask, ind=0, vmax=1).show() # tensor2disp(backGroundMask, ind=0, vmax=1).show() # tensor2rgb(inputs[('color', 0, 0)], ind=0).show() # tensor2semantic(inputs['seman_gt'],ind=0,isGt=True).show() iouForeMean, iouBackMean, isvalid = mrb.visualization(gtdepth, foreGroundMask, backGroundMask, viewind= index, rgb=inputs[('color', 0, 0)]) iouFore_gtdepth2gtseman.append(iouForeMean) iouBack_gtdepth2gtseman.append(iouBackMean) iouValid_gtdepth2gtseman.append(isvalid) iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask, backGroundMask, viewind=index, rgb=inputs[('color', 0, 0)]) iouFore_estdepth2gtseman.append(iouForeMean) iouBack_estdepth2gtseman.append(iouBackMean) iouValid_estdepth2gtseman.append(isvalid) semanMapEst = outputs[('seman', 0)] semanMapEst_sfxed = sfx(semanMapEst) foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1) backGroundMask_est = torch.sum(semanMapEst_sfxed[:, backgroundType, :, :], dim=1).unsqueeze(1) other_est = 1 - (foreGroundMask_est + backGroundMask_est) tot_est = torch.cat([foreGroundMask_est, backGroundMask_est, other_est], dim=1) foreGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 0).unsqueeze(1) backGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 1).unsqueeze(1) iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask_est_bin, backGroundMask_est_bin, viewind=index, rgb=inputs[('color', 0, 0)]) iouFore_estdepth2estseman.append(iouForeMean) iouBack_estdepth2estseman.append(iouBackMean) iouValid_estdepth2estseman.append(isvalid) # tensor2disp(foreGroundMask_est_bin, vmax=1, ind=0).show() # tensor2disp(backGroundMask_est_bin, vmax=1, ind=0).show() if viewLiuSemanCompare: foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky foreGroundMask = torch.ones(dispMap.shape).cuda().byte() backGroundMask = torch.ones(dispMap.shape).cuda().byte() with torch.no_grad(): for m in foregroundType: foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m) foreGroundMask = 1 - foreGroundMask for m in backgroundType: backGroundMask = backGroundMask * (inputs['seman_gt'] != m) backGroundMask = 1 - backGroundMask dispMapEst = outputs[('disp', 0)] semanMapEst = outputs[('seman', 0)] semanMapGt = inputs['seman_gt'] depthMapGt = inputs['depth_gt'] sparseDepthmapGrad = compGrad.computegrad11_sparse(depthMapGt) sparseDepthmapGrad_bin = sparseDepthmapGrad > 0 sparseDepthmapGrad = F.interpolate(sparseDepthmapGrad, [height, width], mode='bilinear', align_corners=True) sparseDepthmapGrad_bin = F.interpolate(sparseDepthmapGrad_bin.float(), [height, width], mode='nearest') sparseDepthmapGrad = sparseDepthmapGrad * sparseDepthmapGrad_bin # depthMapGt_bin = depthMapGt > 1e-1 # depthMapGt = F.interpolate(sparseDepthmapGrad, (height, width), mode='bilinear', align_corners=False) # depthMapGt_bin = F.interpolate(depthMapGt_bin.float(), (height, width), mode='nearest') # depthMapGt = depthMapGt * depthMapGt_bin # compGrad.computegrad11_sparse(depthMapGt) # tensor2disp(depthMapGt>0, ind=0, vmax=1).show() semanMapEst_sfxed = sfx(semanMapEst) semanMapEst_inds = torch.argmax(semanMapEst_sfxed, dim=1).unsqueeze(1) seman_est_fig = tensor2semantic(semanMapEst_inds, ind=0) seman_gt_fig = tensor2semantic(semanMapGt, ind=0) depthMapGt_fig = tensor2disp(depthMapGt, ind=0, vmax=20) depthMapGt_fig = depthMapGt_fig.resize((width, height), resample=pil.BILINEAR) foreGroundMask_est = torch.sum(semanMapEst_sfxed[:,foregroundType,:,:], dim=1).unsqueeze(1) dispMapGrad = compGrad.computegrad11(dispMapEst) foreGroundMaskGrad = compGrad.computegrad11(foreGroundMask.float()) foreGroundMask_estGrad = compGrad.computegrad11(foreGroundMask_est) sparseDepthmapGrad_fig = tensor2disp(sparseDepthmapGrad, ind=0, vmax=20) dispMapGrad_fig = tensor2disp(dispMapGrad, ind=0, vmax=0.08) foreGroundMaskGrad_fig = tensor2disp(foreGroundMaskGrad, ind=0, vmax=1) foreGroundMask_estGrad_fig = tensor2disp(foreGroundMask_estGrad, ind=0, vmax=1.5) dispMapGrad_bin = dispMapGrad > 0.011 foreGroundMaskGrad_bin = foreGroundMaskGrad > 0.5 foreGroundMask_estGrad_bin = foreGroundMask_estGrad > 0.6 sparseDepthmapGrad_bin = sparseDepthmapGrad > 9 dispMapGrad_bin_fig = tensor2disp(dispMapGrad_bin, ind=0, vmax=1) foreGroundMaskGrad_bin_fig = tensor2disp(foreGroundMaskGrad_bin, ind=0, vmax=1) foreGroundMask_estGrad_bin_fig = tensor2disp(foreGroundMask_estGrad_bin, ind=0, vmax=1) sparseDepthmapGrad_bin_fig = tensor2disp(sparseDepthmapGrad_bin, ind=0, vmax=1) visualizeImage = np.concatenate([np.array(fig_rgb), np.array(fig_disp)[:,:,0:3], np.array(seman_est_fig), np.array(seman_gt_fig), np.array(depthMapGt_fig)[:,:,0:3]], axis=0) visualizeImage_grad = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_fig)[:,:,0:3], np.array(foreGroundMaskGrad_fig)[:,:,0:3], np.array(sparseDepthmapGrad_fig)[:,:,0:3]], axis=0) visualizeimage_grad_bin = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_bin_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_bin_fig)[:,:,0:3], np.array(foreGroundMaskGrad_bin_fig)[:,:,0:3], np.array(sparseDepthmapGrad_bin_fig)[:,:,0:3]], axis=0) tot = np.concatenate([np.array(visualizeImage), np.array(visualizeImage_grad), np.array(visualizeimage_grad_bin)], axis=1) pil.fromarray(tot).save('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/%d.png' % idx) # pil.fromarray(tot).show() # pil.fromarray(visualizeImage).show() # pil.fromarray(visualizeImage_grad).show() # pil.fromarray(visualizeimage_grad_bin).show() semanest2semangt = semanest2semangt + cmpBCons.computeDistance(foreGroundMask_estGrad_bin, foreGroundMaskGrad_bin) depth2disp = depth2disp + cmpBCons.computeDistance(sparseDepthmapGrad_bin, dispMapGrad_bin) depth2semangt = depth2semangt + cmpBCons.computeDistance(sparseDepthmapGrad_bin, foreGroundMaskGrad_bin) disp2semanest = disp2semanest + cmpBCons.computeDistance(dispMapGrad_bin, foreGroundMask_estGrad_bin) # tensor2disp(dispMapEst, ind=index, percentile=90).show() if viewBorderConverge: semanMapEst = outputs[('seman', 0)] semanMapEst_sfxed = sfx(semanMapEst) foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1) dispMapEst = outputs[('disp', 0)] # borderConverge.visualization(dispMapEst, foreGroundMask_est) if expBin: expbinmap.visualization3(disparity=dispMapEst, semantics=foreGroundMask_est) a = 1 if viewSecondOrder: disp2order = compSecGrad.computegrad11(outputs[('disp', 0)]) tensor2disp(disp2order, ind=0, percentile=95).show() if viewSurfaceNormal: surnorm = compsn.visualize(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float(), orgEstPts=veh_coord, gtEstPts=veh_coord_gt, viewindex=index) surnormMap = compsn(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float()) if viewSelfOcclu: fl = inputs[("K", 0)][:, 0, 0] bs = torch.abs(inputs["stereo_T"][:, 0, 3]) clufig, suppressedDisp = selfclu.visualize(dispMap, viewind=index) if viewSurfaceNormal and viewSelfOcclu: surnorm = surnorm.resize([width, height]) surnorm_mixed = pil.fromarray( (np.array(surnorm) * 0.2 + np.array(fig_disp)[:, :, 0:3] * 0.8).astype(np.uint8)) disp_seman = (np.array(fig_disp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype( np.float) * 0.2).astype(np.uint8) supprressed_disp_seman = (np.array(suppressedDisp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype( np.float) * 0.2).astype(np.uint8) rgb_seman = (np.array(fig_seman).astype(np.float) * 0.5 + np.array(fig_rgb).astype( np.float) * 0.5).astype(np.uint8) # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype( # np.float) * 0.7).astype(np.uint8) comb1 = np.concatenate([np.array(supprressed_disp_seman)[:, :, 0:3], np.array(suppressedDisp)[:, :, 0:3]], axis=1) comb2 = np.concatenate([np.array(disp_seman)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1) # comb3 = np.concatenate([np.array(errFig)[:, :, 0:3], np.array(surnorm)[:, :, 0:3]], axis=1) comb4 = np.concatenate([np.array(fig_seman)[:, :, 0:3], np.array(rgb_seman)[:, :, 0:3]], axis=1) comb6 = np.concatenate([np.array(clufig)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1) fig3dsize = np.ceil(np.array([comb4.shape[1] , comb4.shape[1] / fig_3d.size[0] * fig_3d.size[1]])).astype(np.int) comb5 = np.array(fig_3d.resize(fig3dsize)) # fig = pil.fromarray(combined) # fig.save(os.path.join(dirpath, str(idx) + '.png')) print("%dth img finished" % idx) # if idx >=4: # break if viewLiuSemanCompare: semanest2semangt_p = semanest2semangt / np.sum(semanest2semangt) semanest2semangt_p_ = semanest2semangt_p[0:-1] mean = np.sum(np.arange(len(semanest2semangt_p_)) * semanest2semangt_p_) std = np.sqrt(np.sum((np.arange(len(semanest2semangt_p_)) - mean) ** 2 * semanest2semangt_p_)) fig, ax = plt.subplots() ax.bar(np.arange(len(semanest2semangt_p)), semanest2semangt_p) ax.set_ylabel('Percentile') ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std)) ax.set_title("Pixel distance of semantic, est to gt") fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/seman_est2gt.png") plt.close(fig) depth2disp_p = depth2disp / np.sum(depth2disp) depth2disp_p_ = depth2disp_p[0:-1] mean = np.sum(np.arange(len(depth2disp_p_)) * depth2disp_p_) std = np.sqrt(np.sum((np.arange(len(depth2disp_p_)) - mean) ** 2 * depth2disp_p_)) fig, ax = plt.subplots() ax.bar(np.arange(len(depth2disp_p)), depth2disp_p) ax.set_ylabel('Percentile') ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std)) ax.set_title("Pixel distance of depth, gt to est") fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth_gt2est.png") plt.close(fig) depth2semangt_p = depth2semangt / np.sum(depth2semangt) depth2semangt_p_ = depth2semangt_p[0:-1] mean = np.sum(np.arange(len(depth2semangt_p_)) * depth2semangt_p_) std = np.sqrt(np.sum((np.arange(len(depth2semangt_p_)) - mean) ** 2 * depth2semangt_p_)) fig, ax = plt.subplots() ax.bar(np.arange(len(depth2semangt_p)), depth2semangt_p) ax.set_ylabel('Percentile') ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std)) ax.set_title("Pixel distance of depth and semantic, gt") fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_gt.png") plt.close(fig) disp2semanest_p = disp2semanest / np.sum(disp2semanest) disp2semanest_p_ = disp2semanest_p[0:-1] mean = np.sum(np.arange(len(disp2semanest_p_)) * disp2semanest_p_) std = np.sqrt(np.sum((np.arange(len(disp2semanest_p_)) - mean) ** 2 * disp2semanest_p_)) fig, ax = plt.subplots() ax.bar(np.arange(len(disp2semanest_p)), disp2semanest_p) ax.set_ylabel('Percentile') ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std)) ax.set_title("Pixel distance of depth and semantic, est") fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_est.png") plt.close(fig) if viewMutuallyRegularizedBorder: iouFore_gtdepth2gtseman = np.array(iouFore_gtdepth2gtseman) iouBack_gtdepth2gtseman = np.array(iouBack_gtdepth2gtseman) iouValid_gtdepth2gtseman = np.array(iouValid_gtdepth2gtseman) iouFore_gtdepth2gtsemanMean = np.sum(iouFore_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman) iouBack_gtdepth2gtsemanMean = np.sum(iouBack_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman) iouFore_estdepth2gtseman = np.array(iouFore_estdepth2gtseman) iouBack_estdepth2gtseman = np.array(iouBack_estdepth2gtseman) iouValid_estdepth2gtseman = np.array(iouValid_estdepth2gtseman) iouFore_estdepth2gtsemanMean = np.sum(iouFore_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman) iouBack_estdepth2gtsemanMean = np.sum(iouBack_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman) iouFore_estdepth2estseman = np.array(iouFore_estdepth2estseman) iouBack_estdepth2estseman = np.array(iouBack_estdepth2estseman) iouValid_estdepth2estseman = np.array(iouValid_estdepth2estseman) iouFore_estdepth2estsemanMean = np.sum(iouFore_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman) iouBack_estdepth2estsemanMean = np.sum(iouBack_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman) print("iouFore_gtdepth2gtsemanMean is % f" % iouFore_gtdepth2gtsemanMean) print("iouBack_gtdepth2gtsemanMean is % f" % iouBack_gtdepth2gtsemanMean) print("iouFore_estdepth2gtsemanMean is % f" % iouFore_estdepth2gtsemanMean) print("iouBack_estdepth2gtsemanMean is % f" % iouBack_estdepth2gtsemanMean) print("iouFore_estdepth2estsemanMean is % f" % iouFore_estdepth2estsemanMean) print("iouBack_estdepth2estsemanMean is % f" % iouBack_estdepth2estsemanMean)
print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.split, "train_files.txt")) # filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, tag=opt.dataset, img_ext='png', load_meta=opt.load_meta, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics=opt.is_predicted_semantics) # dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, # pin_memory=True, drop_last=False) dataloader = DataLoader(dataset, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False)
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) if opt.Lite_HR_Depth: encoder = networks.MobileEncoder(pretrained=None) elif opt.HR_Depth: encoder = networks.ResnetEncoder(18, False) else: assert False, " Please choose HR-Depth or Lite-HR-Depth " depth_decoder = networks.HRDepthDecoder(encoder.num_ch_enc, mobile_encoder=opt.Lite_HR_Depth) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() output = depth_decoder(encoder(input_color)) pred_disp, _ = disp_to_depth(output[("disparity", "Scale0")], 0.1, 100.0) pred_disp = pred_disp.cpu()[:, 0].numpy() pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") print(" Using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp # Apply the mask proposed by Eigen mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth)) ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format( med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def prediction(opts): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = opts['min_depth'] MAX_DEPTH = opts['max_depth'] data_path = opts['dataset']['path'] batch_size = opts['dataset']['batch_size'] num_workers = opts['dataset']['num_workers'] feed_height = opts['feed_height'] feed_width = opts['feed_width'] full_width = opts['dataset']['full_width'] full_height = opts['dataset']['full_height'] metric_mode = opts['metric_mode'] framework_mode = opts['model']['mode'] #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值 #但是对于 data_path = Path(opts['dataset']['path']) lines = Path(opts['dataset']['split'] ['path']) / opts['dataset']['split']['test_file'] model_path = opts['model']['load_paths'] components = opts['model']['mode'] frame_sides = opts['frame_sides'] out_dir_base = Path(opts['out_dir_base']) # frame_prior,frame_now,frame_next = opts['frame_sides'] encoder, decoder = model_init(model_path, mode=components) file_names = readlines(lines) print('-> dataset_path:{}'.format(data_path)) print('-> model_path') for k, v in opts['model']['load_paths'].items(): print('\t' + str(v)) print("-> metrics mode: {}".format(metric_mode)) print("-> data split:{}".format(lines)) print('-> total:{}'.format(len(file_names))) file_names.sort() #prediction loader # test_files = [] # for base in file_names: # test_files.append(data_path/base) # test_files.sort() if opts['dataset']['type'] == 'mc': dataset = datasets.MCDataset(data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") elif opts['dataset']['type'] == 'kitti': dataset = datasets.KITTIRAWDataset( # KITTIRAWData data_path=data_path, filenames=file_names, height=feed_height, width=feed_width, frame_sides=frame_sides, num_scales=1, mode="prediction") dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=False) out_shows = [] if opts['out_dir']: out_dir = out_dir_base / opts['out_dir'] else: out_dir = out_dir_base / data_path.stem out_dir.mkdir_p() for data in tqdm(dataloader): input_color = input_frames(data, mode=framework_mode, frame_sides=frame_sides) features = encoder(input_color) disp = decoder(*features) pred_disp, pred_depth = disp_to_depth(disp, min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) out_show = pred_disp out_show = out_show.cpu()[:, 0].numpy() out_shows.append(out_show) for idx, item in enumerate(out_shows): depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth') idx += 1 plt.imsave(out_dir / depth_name + '{}'.format('.png'), item[0], cmap='magma')
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) if opt.dataset == 'cityscape': dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, tag=opt.dataset) elif opt.dataset == 'kitti': dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0,'s'], 4, tag='kitti', is_train=False, img_ext='png', load_meta=False, is_load_semantics=True, is_predicted_semantics=True, load_morphed_depth=False) else: raise ValueError("No predefined dataset") dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) if opt.switchMode == 'on': depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() sfx = torch.nn.Softmax(dim=1) print("Evaluation starts") confMatrix = generateMatrix(args) nbPixels = 0 count255 = 0 with torch.no_grad(): for idx, inputs in enumerate(dataloader): input_color = inputs[("color", 0, 0)].cuda() outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False) gt = inputs['seman_gt_eval'].cpu().numpy().astype(np.uint8) pred = sfx(outputs[('seman', 0)]).detach() pred = torch.argmax(pred, dim=1).type(torch.float).unsqueeze(1) pred = F.interpolate(pred, [gt.shape[1], gt.shape[2]], mode='nearest') pred = pred.squeeze(1).cpu().numpy().astype(np.uint8) # visualize_semantic(gt[0,:,:]).show() # visualize_semantic(pred[0,:,:]).show() groundTruthNp = gt predictionNp = pred nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2] # encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype(np.int32) + 1 encoding_value = 256 # precomputed encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp values, cnt = np.unique(encoded, return_counts=True) for value, c in zip(values, cnt): pred_id = value % encoding_value gt_id = int((value - pred_id) / encoding_value) if pred_id == 255 or gt_id == 255: count255 = count255 + c continue if not gt_id in args.evalLabels: printError("Unknown label with id {:}".format(gt_id)) confMatrix[gt_id][pred_id] += c print("Finish %dth batch" % idx) if confMatrix.sum() + count255 != nbPixels: printError( 'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format( confMatrix.sum(), nbPixels)) classScoreList = {} for label in args.evalLabels: labelName = trainId2label[label].name classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args) vals = np.array(list(classScoreList.values())) mIOU = np.mean(vals[np.logical_not(np.isnan(vals))]) # if opt.save_pred_disps: # output_path = os.path.join( # opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) # print("-> Saving predicted disparities to ", output_path) # np.save(output_path, pred_disps) print("mIOU is %f" % mIOU)
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" img_ext = '.png' if opt.png else '.jpg' if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) # Check if superpixel dataset is used and create superpixel image if "superpixel" in opt.dataset or opt.superpixel_mask_loss_binary or opt.normal_loss or \ opt.superpixel_mask_loss_continuous or opt.input_channels is 4 or opt.input_channels is 6: # get number of channels to use for superpixel # 4 channel will use numpy array with superpixel indices # 3 channel will use rgb and put superpixel in dictionary. Can be used eg. in loss # 6 channel will use normal image + image averaged over superpixel area num_sup_channels = opt.input_channels print("Using {} channel input.".format(num_sup_channels)) if opt.no_superpixel_check: # dont check if superpixel information is correct print("Warning: Skip checking superpixel information.") else: print("Start converting test images to superpixel.") convert_rgb_to_superpixel(opt.data_path, filenames, opt.superpixel_method, opt.superpixel_arguments, img_ext=img_ext, num_channel=num_sup_channels) dataset = datasets.SuperpixelDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, opt, is_train=False) else: dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, opt, is_train=False) dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) # will use encoder according to number of input channels encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_channels=opt.input_channels) # if the surface normal are used we have to select the NormalDecoder instead of the Depth Decoder. if opt.decoder == "normal_vector": depth_decoder = networks.NormalDecoder(encoder.num_ch_enc) else: depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] print("-> Computing predictions with size {}x{}".format( encoder_dict['width'], encoder_dict['height'])) with torch.no_grad(): for data in dataloader: if opt.dataset == "kitti_superpixel": if opt.input_channels is 3: input_color = data[("color", 0, 0)].cuda() elif opt.input_channels is 4: color = data[("color", 0, 0)].cuda() superpixel = data[("super_label", 0, 0)].cuda() input_color = torch.cat((color, superpixel), dim=1) elif opt.input_channels is 6: color = data[("color", 0, 0)].cuda() superpixel = data[("super_img", 0, 0)].cuda() input_color = torch.cat((color, superpixel), dim=1) else: raise NotImplementedError("given input channel size is not implemented.") else: input_color = data[("color", 0, 0)].cuda() K = data[("K", 0)].cuda() K_inv = data[("inv_K", 0)] if opt.post_process: # Post-processed results require each image to have two forward passes input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0) output = depth_decoder(encoder(input_color)) if opt.decoder == "normal_vector": normal_vec = output[("normal_vec", 0)] #depth = nd.normal_to_depth(K_inv, normal_vec, opt.min_depth, opt.max_depth) disp = nd.normals_to_disp3(K_inv, normal_vec) # print("new depth tensor shape", depth.shape) output[("disp", 0)] = disp # scaling of disp to min_depth to max_depth pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() else: pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.save_pred_disps: output_path = os.path.join( opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.") quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1')["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR)) opt.disable_median_scaling = True opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen": mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor if not opt.disable_median_scaling: ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth)) if not opt.disable_median_scaling: ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False, load_semantics=opt.load_semantics, seman_path=opt.seman_path) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) if opt.bnMorphLoss: from bnmorph.bnmorph import BNMorph bnmorph = BNMorph(height=encoder_dict['height'], width=encoder_dict['width']).cuda() if opt.post_process: tool = grad_computation_tools( batch_size=opt.batch_size * 2, height=encoder_dict['height'], width=encoder_dict['width']).cuda() else: tool = grad_computation_tools( batch_size=opt.batch_size, height=encoder_dict['height'], width=encoder_dict['width']).cuda() model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] count = 0 with torch.no_grad(): for data in dataloader: input_color = data[("color", 0, 0)].cuda() if opt.post_process: input_color = torch.cat( (input_color, torch.flip(input_color, [3])), 0) if 'seman_gt' in data: data['seman_gt'] = torch.cat( (data['seman_gt'], torch.flip( data['seman_gt'], [3])), 0) features = encoder(input_color) outputs = dict() outputs.update(depth_decoder(features)) if opt.bnMorphLoss: for key, ipt in data.items(): if not (key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta' or key == 'file_add'): data[key] = ipt.to(torch.device("cuda")) disparity_grad_bin = tool.get_disparityEdge(outputs['disp', 0]) semantics_grad_bin = tool.get_semanticsEdge( data['seman_gt']) morphedx, morphedy, coeff = bnmorph.find_corresponding_pts( disparity_grad_bin, semantics_grad_bin) morphedx = (morphedx / (encoder_dict['width'] - 1) - 0.5) * 2 morphedy = (morphedy / (encoder_dict['height'] - 1) - 0.5) * 2 grid = torch.cat([morphedx, morphedy], dim=1).permute(0, 2, 3, 1) dispMaps_morphed = F.grid_sample(outputs['disp', 0], grid, padding_mode="border") outputs[("disp", 0)] = dispMaps_morphed count = count + 1 pred_disp, _ = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() if opt.post_process: N = pred_disp.shape[0] // 2 pred_disp = batch_post_process_disparity( pred_disp[:N], pred_disp[N:, :, ::-1]) pred_disps.append(pred_disp) pred_disps = np.concatenate(pred_disps) else: # Load predictions from file print("-> Loading predictions from {}".format(opt.ext_disp_to_eval)) pred_disps = np.load(opt.ext_disp_to_eval) if opt.eval_eigen_to_benchmark: eigen_to_benchmark_ids = np.load( os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy")) pred_disps = pred_disps[eigen_to_benchmark_ids] if opt.save_pred_disps: output_path = os.path.join(opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split)) print("-> Saving predicted disparities to ", output_path) np.save(output_path, pred_disps) if opt.no_eval: print("-> Evaluation disabled. Done.") quit() elif opt.eval_split == 'benchmark': save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions") print("-> Saving out benchmark predictions to {}".format(save_dir)) if not os.path.exists(save_dir): os.makedirs(save_dir) for idx in range(len(pred_disps)): disp_resized = cv2.resize(pred_disps[idx], (1216, 352)) depth = STEREO_SCALE_FACTOR / disp_resized depth = np.clip(depth, 0, 80) depth = np.uint16(depth * 256) save_path = os.path.join(save_dir, "{:010d}.png".format(idx)) cv2.imwrite(save_path, depth) print( "-> No ground truth is available for the KITTI benchmark, so not evaluating. Done." ) quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") if opt.eval_stereo: print(" Stereo evaluation - " "disabling median scaling, scaling by {}".format( STEREO_SCALE_FACTOR)) opt.disable_median_scaling = True opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR else: print(" Mono evaluation - using median scaling") errors = [] ratios = [] for i in range(pred_disps.shape[0]): gt_depth = gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_disp = pred_disps[i] pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) pred_depth = 1 / pred_disp if opt.eval_split == "eigen" or opt.UseCustTest: mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) else: mask = gt_depth > 0 pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] pred_depth *= opt.pred_depth_scale_factor if not opt.disable_median_scaling: ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append( compute_errors( gt_depth, pred_depth, UseGtMedianScaling=(opt.UseGtMedianScaling == True))) if not opt.disable_median_scaling: ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format( med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7 ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!")
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 selfOccluMask = SelfOccluMask().cuda() selfOccluMask.th = 0 if opt.isCudaMorphing and opt.borderMorphLoss: bnmorph = BNMorph(height=opt.height, width=opt.width, sparsityRad=2).cuda() assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \ "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo" if opt.ext_disp_to_eval is None: opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) filenames = readlines( os.path.join(splits_dir, opt.split_name, opt.appendix_name + ".txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset( opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'], [0, 's'], 4, is_train=False, tag=opt.dataset, img_ext='png', load_meta=opt.load_meta, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics=opt.is_predicted_semantics) dataloader = DataLoader(dataset, 2, shuffle=False, num_workers=opt.num_workers, drop_last=False) encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_images=2) depth_decoder = networks.DepthDecoder( encoder.num_ch_enc, isSwitch=(opt.switchMode == 'on'), isMulChannel=opt.isMulChannel, outputtwoimage=(opt.outputtwoimage == True)) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_disps = [] mergeDisp = Merge_MultDisp(opt.scales, batchSize=opt.batch_size) count = 0 tottime = 0 if not os.path.isdir(opt.output_dir): os.mkdir(opt.output_dir) with torch.no_grad(): for data in dataloader: # input_colorl = torch.cat([data[("color", 0, 0)], data[("color", 's', 0)]], dim=1).cuda() # input_colorr = torch.cat([data[("color", 's', 0)], data[("color", 0, 0)]], dim=1).cuda() # input_color = torch.cat([input_colorl, input_colorr], dim=0) start = time.time() input_color = torch.cat( [data[("color", 0, 0)], data[("color", 's', 0)]], dim=1).cuda() # tensor2rgb(input_color[:,0:3,:,:], ind=0).show() # tensor2rgb(input_color[:, 3:6, :, :], ind=0).show() # tensor2rgb(input_color[:, 0:3, :, :], ind=1).show() features = encoder(input_color) outputs = dict() outputs.update( depth_decoder(features, computeSemantic=False, computeDepth=True)) mergeDisp(data, outputs, eval=True) count = count + 1 scaled_disp, _ = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = scaled_disp pred_disp = pred_disp.cpu()[:, 0].numpy() real_scale_disp = scaled_disp * (torch.abs( data[("K", 0)][:, 0, 0] * data["stereo_T"][:, 0, 3]).view( opt.batch_size, 1, 1, 1).expand_as(scaled_disp)).cuda() SSIMMask = selfOccluMask(real_scale_disp, data["stereo_T"][:, 0, 3].cuda()) store_path = filenames[data['idx'][0].numpy()].split(' ') folder1 = os.path.join(opt.output_dir, store_path[0].split('/')[0]) folder2 = os.path.join(opt.output_dir, store_path[0]) folder3 = os.path.join(folder2, 'image_02') folder4 = os.path.join(folder2, 'image_03') if not os.path.isdir(folder1): os.mkdir(folder1) if not os.path.isdir(folder2): os.mkdir(folder2) if not os.path.isdir(folder3): os.mkdir(folder3) if not os.path.isdir(folder4): os.mkdir(folder4) if opt.outputvisualizaiton: folder5 = os.path.join(folder2, 'image_02_compose') folder6 = os.path.join(folder2, 'image_03_compose') if not os.path.isdir(folder5): os.mkdir(folder5) if not os.path.isdir(folder6): os.mkdir(folder6) a = outputs[("disp", 0)] * (1 - SSIMMask) fig1 = tensor2disp(a, ind=0, vmax=0.15) fig2 = tensor2disp(a, ind=1, vmax=0.15) fig1.save( os.path.join(folder5, store_path[1].zfill(10) + '.png')) fig2.save( os.path.join(folder6, store_path[1].zfill(10) + '.png')) pathl = os.path.join(folder3, store_path[1].zfill(10) + '.png') pathr = os.path.join(folder4, store_path[1].zfill(10) + '.png') # fig1 = tensor2disp(outputs[("disp", 0)], ind=1, vmax=0.1) # fig2 = tensor2disp(outputs[("disp", 0)] * (1 - SSIMMask), ind=1, vmax=0.1) # fig_combined = np.concatenate([np.array(fig1), np.array(fig2)], axis=0) # pil.fromarray(fig_combined).show() real_scale_disp = real_scale_disp * (1 - SSIMMask) stored_disp = real_scale_disp / 960 save_loss(stored_disp[0, 0, :, :].cpu().numpy(), pathl) save_loss(stored_disp[1, 0, :, :].cpu().numpy(), pathr) duration = time.time() - start tottime = tottime + duration print("left time %f hours" % (tottime / count * (len(filenames) - count) / 60 / 60))
def evaluate(opt): """Evaluates a pretrained model using a specified test set """ opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt")) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) dataset = datasets.KITTIRAWDataset(opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],[0], 4, is_train=False) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3) model_dict = encoder.state_dict() encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() dirmapping = {'l':'image_02', 'r':'image_03'} localgeomDict = dict() print("-> Computing predictions with size {}x{}".format(encoder_dict['width'], encoder_dict['height'])) totloss = 0 with torch.no_grad(): for count in range(len(filenames)): data = dataset.__getitem__(count) input_color = data[("color", 0, 0)].unsqueeze(0).cuda() output = depth_decoder(encoder(input_color)) _, preddepth = disp_to_depth(output[("disp", 0)][:,2:3,:,:], opt.min_depth, opt.max_depth) preddepth = preddepth * STEREO_SCALE_FACTOR htheta = output[("disp", 0)][:, 0:1, :, :] * 2 * np.pi vtheta = output[("disp", 0)][:, 1:2, :, :] * 2 * np.pi seq, frame, dir = filenames[count].split(' ') depthgt = pil.open(os.path.join(opt.kitti_gt_path, seq, dirmapping[dir], frame + '.png')) depthgt = np.array(depthgt).astype(np.float32) / 256.0 depthgt = torch.from_numpy(depthgt).unsqueeze(0).unsqueeze(0).cuda() _, _, ch, cw = depthgt.shape acckey = str(ch) + '_' + str(cw) if acckey not in localgeomDict: kittiw = cw kittih = ch intrinsicKitti = np.array([ [0.58 * kittiw, 0, 0.5 * kittiw], [0, 1.92 * kittih, 0.5 * kittih], [0, 0, 1]], dtype=np.float32) localthetadesp = LocalThetaDesp(height=kittih, width=kittiw, batch_size=1, intrinsic=intrinsicKitti).cuda() localgeomDict[acckey] = localthetadesp rgbi = F.interpolate(input_color, [ch, cw], mode='bilinear', align_corners=True) hthetai = F.interpolate(htheta, [ch, cw], mode='bilinear', align_corners=True) vthetai = F.interpolate(vtheta, [ch, cw], mode='bilinear', align_corners=True) preddepthi = F.interpolate(preddepth, [ch, cw], mode='bilinear', align_corners=True) # hthetai, vthetai = localgeomDict[acckey].get_theta(preddepthi) ratioh, ratiohl, ratiov, ratiovl = localgeomDict[acckey].get_ratio(htheta=hthetai, vtheta=vthetai) # ratiohl = torch.zeros_like(ratiohl) # ratiovl = torch.zeros_like(ratiovl) logdepthd = torch.log(depthgt) valindic = depthgt > 0 lossrec = torch.zeros_like(logdepthd) countsrec = torch.zeros_like(logdepthd) rndseeds = torch.rand_like(logdepthd) inplaceShapeLoss_cuda.inplaceShapeLoss_forward(logdepthd, ratiohl, ratiovl, valindic.int(), lossrec, countsrec, rndseeds, 30, 30) totloss = totloss + torch.sum(lossrec[lossrec > 0]) / torch.sum(lossrec > 0) # cm = plt.get_cmap('bwr') # xx, yy = np.meshgrid(range(cw), range(ch), indexing='xy') # lossrecnp = lossrec[0, 0, :, :].cpu().numpy() # valmask = np.abs(lossrecnp) > 0 # z = lossrecnp[valmask] # # selector_pos = z > 0 # selector_neg = z < 0 # # bar = 0.005 # # if np.sum(selector_pos) > 1: # pos_bar = bar # z[selector_pos] = z[selector_pos] / pos_bar / 2 # # if np.sum(selector_neg) > 1: # neg_bar = -bar # z[selector_neg] = -z[selector_neg] / neg_bar / 2 # # znormed = z + 0.5 # colorMap = cm(znormed)[:, 0:3] # # plt.figure(figsize=(12, 9), dpi=120, facecolor='w', edgecolor='k') # plt.imshow(tensor2rgb(rgbi, ind=0)) # plt.scatter(xx[valmask], yy[valmask], c=colorMap, s=8) # plt.savefig(os.path.join('/media/shengjie/c9c81c9f-511c-41c6-bfe0-2fc19666fb32/Visualizations/Project_SemanDepth/vls_shapeErrType', str(count) + '.png')) # plt.close() # hthetad, vthetad = localgeomDict[acckey].get_theta(depthmap=preddepthi) # ratiohd, ratiohld, ratiovd, ratiovld = localgeomDict[acckey].get_ratio(htheta=hthetad, vtheta=vthetad) # logdepthd = torch.log(preddepthi) # valindic = preddepthi > 0 # lossrec = torch.zeros_like(logdepthd) # inplaceShapeLoss_cuda.inplaceShapeLoss_integration(logdepthd, ratiohld, ratiovld, valindic.int(), lossrec, 1, 1) totloss = totloss / len(filenames) print(totloss)