def __getitem__(self, index): im_name_base = self.files[self.split][index] # raw_depth raw_depth_path = pjoin(self.root, str(im_name_base)) raw_depth = png_reader_32bit(raw_depth_path, self.img_size) raw_depth = raw_depth.astype(float) raw_depth = raw_depth / 10000 # raw_depth_mask raw_depth_mask = (raw_depth > 0.0001).astype(float) raw_depth = raw_depth[np.newaxis, :, :] raw_depth = torch.from_numpy(raw_depth).float() raw_depth_mask = torch.from_numpy(raw_depth_mask).float() # image rgb_path = raw_depth_path.replace('depth', 'colors') image = png_reader_uint8(rgb_path, self.img_size) image = image.astype(float) # image = image / 255 image = (image - 128) / 255 image = image.transpose(2, 0, 1) image = torch.from_numpy(image).float() # normal normal_path = raw_depth_path.replace('depth', 'normal') normal = png_reader_uint8(normal_path, self.img_size) normal = normal.astype(float) normal = normal / 255 normal = normal.transpose(2, 0, 1) # normal mask # normal_mask = np.power(normal[0], 2) + np.power(normal[1], 2) + np.power(normal[2], 2) # normal_mask = (normal_mask > 0.001).astype(float) # # normal[0][normal_mask == 0] = 0.001 # normal[1][normal_mask == 0] = 0.001 # normal[2][normal_mask == 0] = 0.001 normal = 2 * normal - 1 normal = torch.from_numpy(normal).float() # image : RGB 3,240,320 # raw_depth : depth 1, 240,320 # raw_depth_mask : 0 or 1, 240,320 # normal : /255 *2-1, 3,240,320 # normal_mask : 0 or 1, 240,320 # with masks # return image, normal, normal_mask, raw_depth_mask, raw_depth # without masks return image, raw_depth, normal
def __getitem__(self, index): # im_name_base = self.files[self.split][index] im_name_base = self.files[index] # raw_depth raw_depth_path = pjoin(self.root, str(im_name_base)) raw_depth = png_reader_32bit(raw_depth_path, self.img_size) raw_depth[raw_depth == 65535] = 0.001 raw_depth = raw_depth.astype(float) raw_depth = raw_depth / 10000 raw_depth = raw_depth[np.newaxis, :, :] raw_depth = torch.from_numpy(raw_depth).float() # raw_depth_mask # raw_depth_mask = (raw_depth > 0.0001).astype(float) # raw_depth_mask = torch.from_numpy(raw_depth_mask).float() # image rgb_path = raw_depth_path.replace('depth_zbuffer', 'rgb') img = png_reader_uint8(rgb_path, self.img_size) img = img.astype(float) # img = img / 255 img = (img - 128) / 255 img = img.transpose(2, 0, 1) img = torch.from_numpy(img).float() # normal normal_path = raw_depth_path.replace('depth_zbuffer', 'normal') normal = png_reader_uint8(normal_path, self.img_size) normal = normal.astype(float) normal = normal / 255 normal = normal.transpose(2, 0, 1) normal = 2 * normal - 1 normal = torch.from_numpy(normal).float() return img, raw_depth, normal
def __getitem__(self, index): im_name_base = self.files[self.split][index] # raw_depth raw_depth_path = pjoin(self.root, str(im_name_base)) raw_depth = png_reader_32bit(raw_depth_path, self.img_size) raw_depth = raw_depth.astype(float) raw_depth = raw_depth / 10000 # raw_depth_mask raw_depth_mask = (raw_depth > 0.0001).astype(float) raw_depth = raw_depth[np.newaxis, :, :] raw_depth = torch.from_numpy(raw_depth).float() raw_depth_mask = torch.from_numpy(raw_depth_mask).float() # # segmentation label # seg_path = raw_depth_path.replace('/depth/', '/label/') # seg_img = png_reader_32bit(seg_path, self.img_size) # seg_img = torch.from_numpy(seg_img) # image rgb_path = raw_depth_path.replace('depth', 'colors') # rgb_path = rgb_path.replace('.png', '.jpg') image = png_reader_uint8(rgb_path, self.img_size) image = image.astype(float) # image = image / 255 image = (image - 128) / 255 image = image.transpose(2, 0, 1) image = torch.from_numpy(image).float() index = im_name_base[19:] index = index.zfill(13) scene_name = im_name_base[:12] # render_depth render_depth_name = index.replace('.png', '_mesh_depth.png') render_depth_path = pjoin(self.root, scene_name, 'render_depth', render_depth_name) render_depth = png_reader_32bit(render_depth_path, self.img_size) render_depth = render_depth.astype(float) render_depth = render_depth / 40000 render_depth = render_depth[np.newaxis, :, :] render_depth = torch.from_numpy(render_depth).float() # normal normal_x_path = render_depth_path.replace('_depth.png', '_nx.png') normal_y_path = render_depth_path.replace('_depth.png', '_ny.png') normal_z_path = render_depth_path.replace('_depth.png', '_nz.png') normal_x_path = normal_x_path.replace('/render_depth/', '/render_normal/') normal_y_path = normal_y_path.replace('/render_depth/', '/render_normal/') normal_z_path = normal_z_path.replace('/render_depth/', '/render_normal/') normal_x = png_reader_32bit(normal_x_path, self.img_size) normal_y = png_reader_32bit(normal_y_path, self.img_size) normal_z = png_reader_32bit(normal_z_path, self.img_size) normal_x = normal_x.astype(float) normal_y = normal_y.astype(float) normal_z = normal_z.astype(float) normal_x = normal_x / 65535 normal_y = normal_y / 65535 normal_z = normal_z / 65535 # normal mask normal_mask = np.power(normal_x, 2) + np.power(normal_y, 2) + np.power( normal_z, 2) normal_mask = (normal_mask > 0.001).astype(float) normal_x[normal_mask == 0] = 0.5 normal_y[normal_mask == 0] = 0.5 normal_z[normal_mask == 0] = 0.5 normal = np.concatenate( (normal_x[:, :, np.newaxis], 1 - normal_z[:, :, np.newaxis], normal_y[:, :, np.newaxis]), axis=2) normal = 2 * normal - 1 normal = torch.from_numpy(normal).float() # image : RGB -0.5-0.5, 3*h*w # raw_depth : /10000, 1*h*w # raw_depth_mask : 0 or 1, h*w # render_depth : /40000, 1*h*w # normal : /65535, h*w*3 # normal_mask : 0 or 1, h*w # seg_img : uint16, h*w same as normal_mask if (self.mode == 'seg'): # For segmentation mask return image, normal, normal_mask, raw_depth_mask, raw_depth, seg_img else: # Ordinary RGBD2normal return image, normal, normal_mask, raw_depth_mask, raw_depth, render_depth
def __getitem__(self, index): im_name_base = self.files[self.split][index] im_path = pjoin(self.root, im_name_base) im_name = im_name_base.replace('_i', '_d') im_name = im_name.replace('undistorted_color_dmages', 'undistorted_depth_images') im_name = im_name.replace('.jpg', '.png') depth_path = pjoin(self.root, im_name) im_name = im_name_base.replace('_i', '_d') im_name = im_name.replace('undistorted_color_dmages', 'render_normal') lb_path_nx = pjoin(self.root, im_name.replace('.jpg', '_mesh_nx.png')) lb_path_ny = pjoin(self.root, im_name.replace('.jpg', '_mesh_ny.png')) lb_path_nz = pjoin(self.root, im_name.replace('.jpg', '_mesh_nz.png')) im_name = im_name_base.replace('_i', '_d') im_name = im_name.replace('undistorted_color_dmages', 'render_depth') meshdepth_path = pjoin(self.root, im_name.replace('.jpg', '_mesh_depth.png')) im = png_reader_uint8(im_path, self.img_size) #uint8 rawdepth = png_reader_32bit(depth_path, self.img_size) #32bit uint lbx = png_reader_32bit(lb_path_nx, self.img_size) lby = png_reader_32bit(lb_path_ny, self.img_size) lbz = png_reader_32bit(lb_path_nz, self.img_size) meshdepth = png_reader_32bit(meshdepth_path, self.img_size) im = im.astype(float) rawdepth = rawdepth.astype(float) lbx = lbx.astype(float) lby = lby.astype(float) lbz = lbz.astype(float) meshdepth = meshdepth.astype(float) if self.img_norm: # Resize scales images from -0.5 ~ 0.5 im = (im - 128) / 255 # Resize scales labels from -1 ~ 1 lbx = lbx / 65535 lby = lby / 65535 lbz = lbz / 65535 # Resize scales masks from 0 ~ 1 mask = np.power(lbx, 2) + np.power(lby, 2) + np.power(lbz, 2) mask = (mask > 0.001).astype(float) #file holes lbx[mask == 0] = 0.5 lby[mask == 0] = 0.5 lbz[mask == 0] = 0.5 lb = np.concatenate( (lbx[:, :, np.newaxis], 1 - lbz[:, :, np.newaxis], lby[:, :, np.newaxis]), axis=2) lb = 2 * lb - 1 # Resize scales valid, devide by mean value rawdepth = rawdepth / 40000 meshdepth = meshdepth / 40000 # Get valid from rawdepth valid = (rawdepth > 0.0001).astype(float) # NHWC -> NCHW im = im.transpose(2, 0, 1) im = torch.from_numpy(im).float() lb = torch.from_numpy(lb).float() mask = torch.from_numpy(mask).float() valid = torch.from_numpy(valid).float() rawdepth = rawdepth[np.newaxis, :, :] rawdepth = torch.from_numpy(rawdepth).float() meshdepth = meshdepth[np.newaxis, :, :] meshdepth = torch.from_numpy(meshdepth).float() # input: im, 3*h*w # gt: lb, h*w*3 # mask: gt!=0,h*w # valid: rawdepth!=0, h*w # rawdepth: depth with hole, 1*h*w # meshdepth: depth with hole, 1*h*w return im, lb, mask, valid, rawdepth, meshdepth
def test(args): # Setup Model # Setup the fusion model (RGB+Depth) model_name_F = args.arch_F model_F = get_model(model_name_F, True) # concat and output model_F = torch.nn.DataParallel(model_F, device_ids=range( torch.cuda.device_count())) # Setup the map model if args.arch_map == 'map_conv': model_name_map = args.arch_map model_map = get_model(model_name_map, True) # concat and output model_map = torch.nn.DataParallel(model_map, device_ids=range( torch.cuda.device_count())) if args.model_full_name != '': # Use the full name of model to load print("Load training model: " + args.model_full_name) checkpoint = torch.load( pjoin(args.model_savepath, args.model_full_name)) model_F.load_state_dict(checkpoint['model_F_state']) model_map.load_state_dict(checkpoint["model_map_state"]) # Setup image if args.imgset: print("Test on dataset: {}".format(args.dataset)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) v_loader = data_loader(data_path, split=args.test_split, img_size=(args.img_rows, args.img_cols), img_norm=args.img_norm) evalloader = data.DataLoader(v_loader, batch_size=1) print("Finish Loader Setup") model_F.cuda() model_F.eval() if args.arch_map == 'map_conv': model_map.cuda() model_map.eval() sum_mean, sum_median, sum_small, sum_mid, sum_large, sum_num = [], [], [], [], [], [] evalcount = 0 with torch.no_grad(): for i_val, (images_val, labels_val, masks_val, valids_val, depthes_val, meshdepthes_val) in tqdm(enumerate(evalloader)): images_val = Variable(images_val.contiguous().cuda()) labels_val = Variable(labels_val.contiguous().cuda()) masks_val = Variable(masks_val.contiguous().cuda()) valids_val = Variable(valids_val.contiguous().cuda()) depthes_val = Variable(depthes_val.contiguous().cuda()) if args.arch_map == 'map_conv': outputs_valid = model_map( torch.cat( (depthes_val, valids_val[:, np.newaxis, :, :]), dim=1)) outputs, outputs1, outputs2, outputs3, output_d = model_F( images_val, depthes_val, outputs_valid.squeeze(1)) else: outputs, outputs1, outputs2, outputs3, output_d = model_F( images_val, depthes_val, valids_val) outputs_n, pixelnum, mean_i, median_i, small_i, mid_i, large_i = eval_normal_pixel( outputs, labels_val, masks_val) outputs_norm = np.squeeze(outputs_n.data.cpu().numpy(), axis=0) labels_val_norm = np.squeeze(labels_val.data.cpu().numpy(), axis=0) images_val = np.squeeze(images_val.data.cpu().numpy(), axis=0) images_val = images_val + 0.5 images_val = images_val.transpose(1, 2, 0) depthes_val = np.squeeze(depthes_val.data.cpu().numpy(), axis=0) depthes_val = np.transpose(depthes_val, [1, 2, 0]) depthes_val = np.repeat(depthes_val, 3, axis=2) outputs_norm = change_channel(outputs_norm) labels_val_norm = (labels_val_norm + 1) / 2 labels_val_norm = change_channel(labels_val_norm) # if (i_val+1)%10 == 0: misc.imsave( pjoin(args.testset_out_path, "{}_MS_hyb.png".format(i_val + 1)), outputs_norm) misc.imsave( pjoin(args.testset_out_path, "{}_gt.png".format(i_val + 1)), labels_val_norm) misc.imsave( pjoin(args.testset_out_path, "{}_in.jpg".format(i_val + 1)), images_val) misc.imsave( pjoin(args.testset_out_path, "{}_depth.png".format(i_val + 1)), depthes_val) # accumulate the metrics in matrix if ((np.isnan(mean_i)) | (np.isinf(mean_i)) == False): sum_mean.append(mean_i) sum_median.append(median_i) sum_small.append(small_i) sum_mid.append(mid_i) sum_large.append(large_i) sum_num.append(pixelnum) evalcount += 1 if (i_val + 1) % 10 == 0: print( "Iteration %d Evaluation Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (i_val + 1, mean_i, median_i, small_i, mid_i, large_i)) # Summarize the result eval_print(sum_mean, sum_median, sum_small, sum_mid, sum_large, sum_num, item='Pixel-Level') avg_mean = sum(sum_mean) / evalcount sum_mean.append(avg_mean) avg_median = sum(sum_median) / evalcount sum_median.append(avg_median) avg_small = sum(sum_small) / evalcount sum_small.append(avg_small) avg_mid = sum(sum_mid) / evalcount sum_mid.append(avg_mid) avg_large = sum(sum_large) / evalcount sum_large.append(avg_large) print( "evalnum is %d, Evaluation Image-Level Mean Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (evalcount, avg_mean, avg_median, avg_small, avg_mid, avg_large)) sum_matrix = np.transpose( [sum_mean, sum_median, sum_small, sum_mid, sum_large]) if args.model_full_name != '': sum_file = args.model_full_name[:-4] + '.csv' np.savetxt(pjoin(args.model_savepath, sum_file), sum_matrix, fmt='%.6f', delimiter=',') print("Saving to %s" % (sum_file)) # end of dataset test else: if os.path.isdir(args.out_path) == False: os.mkdir(args.out_path) print("Read Input Image from : {}".format(args.img_path)) for i in os.listdir(args.img_path): if not i.endswith('.jpg'): continue print i input_f = args.img_path + i depth_f = args.depth_path + i[:-4] + '.png' output_f = args.out_path + i[:-4] + '_rgbd.png' img = misc.imread(input_f) orig_size = img.shape[:-1] if args.img_rot: img = np.transpose(img, (1, 0, 2)) img = np.flipud(img) img = misc.imresize(img, ( args.img_cols, args.img_rows)) # Need resize the image to model inputsize else: img = misc.imresize(img, ( args.img_rows, args.img_cols)) # Need resize the image to model inputsize img = img.astype(np.float) if args.img_norm: img = (img - 128) / 255 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() if args.img_rot: depth = png_reader_32bit(depth_f, (args.img_rows, args.img_cols)) depth = np.transpose(depth, (1, 0)) depth = np.flipud(depth) # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols)) # valid = np.transpose(valid, (1,0)) # valid = np.flipud(valid) else: depth = png_reader_32bit(depth_f, (args.img_rows, args.img_cols)) # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols)) depth = depth.astype(float) # Please change to the scale so that scaled_depth=1 corresponding to real 10m depth # matterpot depth=depth/40000 scannet depth=depth/10000 depth = depth / (args.d_scale) if depth.ndim == 3: # to dim 2 depth = depth[:, :, 0] # if valid.ndim == 3: #to dim 2 # valid = valid[:,:,0] # valid = 1-depth # valid[valid>1] = 1 valid = (depth > 0.0001).astype(float) # valid = depth.astype(float) depth = depth[np.newaxis, :, :] depth = np.expand_dims(depth, 0) valid = np.expand_dims(valid, 0) depth = torch.from_numpy(depth).float() valid = torch.from_numpy(valid).float() if torch.cuda.is_available(): model_F.cuda() model_F.eval() if args.arch_map == 'map_conv': model_map.cuda() model_map.eval() images = Variable(img.contiguous().cuda()) depth = Variable(depth.contiguous().cuda()) valid = Variable(valid.contiguous().cuda()) else: images = Variable(img) depth = Variable(depth) valid = Variable(valid) with torch.no_grad(): if args.arch_map == 'map_conv': outputs_valid = model_map( torch.cat((depth, valid[:, np.newaxis, :, :]), dim=1)) outputs, outputs1, outputs2, outputs3, output_d = model_F( images, depth, outputs_valid.squeeze(1)) else: outputs, outputs1, outputs2, outputs3, output_d = model_F( images, depth, outputs_valid) outputs_norm = norm_imsave(outputs) outputs_norm = np.squeeze(outputs_norm.data.cpu().numpy(), axis=0) # outputs_norm = misc.imresize(outputs_norm, orig_size) outputs_norm = change_channel(outputs_norm) misc.imsave(output_f, outputs_norm) print("Complete")