def read_img(path, img_size=None, no_process=False, only_resize=False): ''' Read image and process ''' proc_img = m_preprocess.get_transform() if no_process: img = PIL.Image.open(path) width, height = img.size else: if img_size is not None: img = PIL.Image.open(path).convert('RGB') img = img.resize(img_size, PIL.Image.BICUBIC) else: img = PIL.Image.open(path).convert('RGB') width, height = img.size if not only_resize: img = proc_img(img) raw_sz = (width, height) return img, raw_sz
def _read_img(p_data, indx, img_size = None, no_process= False, only_resize = False): ''' Read image and process ''' proc_img = m_preprocess.get_transform() if no_process: img = p_data.get_cam2(indx) width, height = img.size else: if img_size is not None: img = p_data.get_cam2(indx) img = img.resize( img_size, PIL.Image.BICUBIC ) else: img = p_data.get_cam2(indx) width, height = img.size if not only_resize: img = proc_img(img) raw_sz = (width, height) return img, raw_sz
def __getitem__(self, indx): r''' outputs: {'img': img.unsqueeze_(0), 'img_gray': img_gray.unsqueeze_(0), 'scene_path': scene_path, 'img_path': img_path } ''' img_path = self.img_seq_paths[indx] proc_normalize = m_preprocess.get_transform() proc_totensor = m_preprocess.to_tensor() img = read_img(img_path , no_process = True)[0] img = img.resize(self.img_size, PIL.Image.NEAREST) img_gray = self.to_gray(img) if self.if_preprocess: img = proc_normalize(img) else: proc_totensor = tfv_transform.ToTensor() img = proc_totensor(img) # image path # scene_path = os.path.split(img_path)[0] return { 'img': img.unsqueeze_(0), 'img_gray': img_gray.unsqueeze_(0), 'scene_path': scene_path, 'img_path': img_path, }
def __getitem__(self, indx): ''' outputs: img, dmap, extM, scene_path , as entries in a dic. ''' dmap_path = self.dmap_seq_paths[indx] proc_normalize = m_preprocess.get_transform() proc_totensor = m_preprocess.to_tensor() # read rgb image # img = _read_img(self.p_data, indx, no_process = True)[0] img = img.resize(self.img_size, PIL.Image.NEAREST) if self.resize_dmap is not None: img_dw = img.resize( [int(self.img_size[0]* self.resize_dmap), int(self.img_size[1]* self.resize_dmap)], PIL.Image.NEAREST) else: img_dw = None img_gray = self.to_gray(img) # read GT depth map (if available) # dmap_raw = _read_dimg(dmap_path, no_process = True)[0] if dmap_raw is not -1: dmap_mask_imgsize = np.array(dmap_raw, dtype=int).astype(np.float32) < 0.01 dmap_mask_imgsize = PIL.Image.fromarray( dmap_mask_imgsize.astype(np.uint8) * 255).resize([self.img_size[0], self.img_size[1]], PIL.Image.NEAREST ) if self.resize_dmap is not None: dmap_imgsize = dmap_raw.resize([self.img_size[0], self.img_size[1]], PIL.Image.NEAREST) dmap_imgsize = proc_totensor(dmap_imgsize)[0, :, :].float() / 256. dmap_rawsize = proc_totensor(dmap_raw)[0,:,:].float() / 256. # resize the depth map # dmap_size = [ int(float(self.img_size[0]) * self.resize_dmap), int(float(self.img_size[1]) * self.resize_dmap)] dmap_raw_bilinear_dw = dmap_raw.resize(dmap_size, PIL.Image.BILINEAR) dmap_raw = dmap_raw.resize(dmap_size, PIL.Image.NEAREST) dmap_mask = dmap_mask_imgsize.resize(dmap_size, PIL.Image.NEAREST) dmap_raw = proc_totensor(dmap_raw)[0,:,:] # single-channel for depth map dmap_raw = dmap_raw.float() / 256. # scale to meter dmap_raw_bilinear_dw = proc_totensor(dmap_raw_bilinear_dw)[0,:,:] dmap_raw_bilinear_dw = dmap_raw_bilinear_dw.float() / 256. if self.resize_dmap is None: dmap_rawsize = dmap_raw dmap_mask = 1 - (proc_totensor(dmap_mask) > 0 ) dmap_mask_imgsize = 1-( proc_totensor( dmap_mask_imgsize ) >0) dmap_raw = dmap_raw * dmap_mask.squeeze().type_as(dmap_raw) dmap_raw_bilinear_dw = dmap_raw_bilinear_dw * dmap_mask.squeeze().type_as(dmap_raw) dmap_imgsize = dmap_imgsize * dmap_mask_imgsize.squeeze().type_as(dmap_imgsize) if self.digitize: # digitize the depth map # dmap = mloader_misc.dMap_to_indxMap( dmap_raw, self.d_candi ) dmap[dmap >= self.label_max] = self.label_max dmap[dmap <= self.label_min] = self.label_min dmap = torch.from_numpy(dmap) dmap_imgsize_digit = mloader_misc.dMap_to_indxMap(dmap_imgsize, self.d_candi) dmap_imgsize_digit[dmap_imgsize_digit >= self.label_max] = self.label_max dmap_imgsize_digit[dmap_imgsize_digit<= self.label_min] = self.label_min dmap_imgsize_digit = torch.from_numpy(dmap_imgsize_digit) dmap_up4_imgsize_digit = mloader_misc.dMap_to_indxMap(dmap_imgsize, self.dup4_candi) dmap_up4_imgsize_digit[dmap_up4_imgsize_digit >= self.dup4_label_max] = self.dup4_label_max dmap_up4_imgsize_digit[dmap_up4_imgsize_digit <= self.dup4_label_min] = self.dup4_label_min dmap_up4_imgsize_digit = torch.from_numpy(dmap_up4_imgsize_digit) else: dmap = dmap_raw dmap_imgsize_digit = dmap_imgsize dmap_up4_imgsize_digit = dmap_imgsize if self.if_preprocess: img = proc_normalize(img) if self.resize_dmap is not None: img_dw = proc_normalize(img_dw) else: proc_totensor = tfv_transform.ToTensor() img = proc_totensor(img) if self.resize_dmap is not None: img_dw = proc_totensor(img_dw) # extM = self.poses[indx] if self.crop_w is not None: side_crop = int( (self.img_size[0] - self.crop_w )/2 ) side_crop_dw = int( side_crop * self.resize_dmap ) img_size = self.img_size img = img[:, :, side_crop: img_size[0]-side_crop] img_dw = img_dw[:, :, side_crop_dw: img_dw.shape[-1]-side_crop_dw] img_gray = img_gray[:, :, side_crop: img_size[0]-side_crop] if dmap_raw is not -1: dmap = dmap[ :, side_crop_dw: (dmap.shape[1] - side_crop_dw) ] dmap_raw = dmap_raw[ :, side_crop_dw: (dmap_raw.shape[1] - side_crop_dw) ] dmap_raw_bilinear_dw = dmap_raw_bilinear_dw[ :, side_crop_dw: (dmap_raw_bilinear_dw.shape[1] - side_crop_dw) ] dmap_rawsize = dmap_rawsize[ :, side_crop : (dmap_rawsize.shape[1] - side_crop ) ] dmap_imgsize = dmap_imgsize[ :, side_crop : (dmap_imgsize.shape[1] - side_crop ) ] dmap_imgsize_digit = dmap_imgsize_digit[ :, side_crop : (dmap_imgsize_digit.shape[1] - side_crop ) ] dmap_up4_imgsize_digit = dmap_up4_imgsize_digit[ :, side_crop : (dmap_up4_imgsize_digit.shape[1] - side_crop ) ] dmap_mask = dmap_mask[:, :, side_crop_dw: (dmap_mask.shape[-1] - side_crop_dw) ] dmap_mask_imgsize = dmap_mask_imgsize[ :, :, side_crop: (dmap_mask_imgsize.shape[-1] - side_crop) ] # read extrinsics # # IMU to camera # M_imu2cam = self.p_data.calib.T_cam2_imu extM = np.matmul( M_imu2cam, np.linalg.inv(self.poses[indx]) ) # image path # scene_path = self.p_data.calib_path img_path = self.p_data.cam2_files[indx] return {'img': img.unsqueeze_(0), 'img_dw': img_dw.unsqueeze_(0), 'dmap': dmap.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_raw':dmap_raw.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_raw_bilinear_dw':dmap_raw_bilinear_dw.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_rawsize': dmap_rawsize.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_imgsize': dmap_imgsize.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_imgsize_digit': dmap_imgsize_digit.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_up4_imgsize_digit': dmap_up4_imgsize_digit.unsqueeze_(0) if dmap_raw is not -1 else -1, 'dmap_mask': dmap_mask.unsqueeze_(0).type_as(dmap) if dmap_raw is not -1 else -1, 'dmap_mask_imgsize': dmap_mask_imgsize.unsqueeze_(0).type_as(dmap) if dmap_raw is not -1 else -1, 'img_gray': img_gray.unsqueeze_(0), 'extM': extM, 'scene_path': scene_path, 'img_path': img_path, }
def __getitem__(self, indx): ''' outputs: img, dmap, extM, scene_path , as entries in a dic. ''' img_path = self.img_seq_paths[indx] dmap_path = self.dmap_seq_paths[indx] cam_pose_path = self.cam_pose_seq_paths[indx] proc_normalize = m_preprocess.get_transform() proc_totensor = m_preprocess.to_tensor() img = read_img(img_path, no_process=True)[0] img = img.resize(self.img_size, PIL.Image.NEAREST) img_gray = self.to_gray(img) dmap_raw = read_img(dmap_path, no_process=True)[0] dmap_mask_imgsize = np.asarray(dmap_raw) < 0.01 dmap_mask_imgsize = PIL.Image.fromarray( dmap_mask_imgsize.astype(np.uint8) * 255).resize( [self.img_size[0], self.img_size[1]], PIL.Image.NEAREST) if self.resize_dmap is not None: dmap_imgsize = dmap_raw.resize( [self.img_size[0], self.img_size[1]], PIL.Image.NEAREST) dmap_imgsize = proc_totensor(dmap_imgsize)[0, :, :].float() * .001 dmap_rawsize = proc_totensor(dmap_raw)[0, :, :].float() * .001 # resize the depth map # dmap_size = [ int(float(self.img_size[0]) * self.resize_dmap), int(float(self.img_size[1]) * self.resize_dmap) ] dmap_raw_bilinear_dw = dmap_raw.resize(dmap_size, PIL.Image.BILINEAR) dmap_raw = dmap_raw.resize(dmap_size, PIL.Image.NEAREST) dmap_mask = dmap_mask_imgsize.resize(dmap_size, PIL.Image.NEAREST) dmap_raw = proc_totensor(dmap_raw)[ 0, :, :] # single-channel for depth map dmap_raw = dmap_raw.float() * .001 # scale to meter dmap_raw_bilinear_dw = proc_totensor(dmap_raw_bilinear_dw)[0, :, :] dmap_raw_bilinear_dw = dmap_raw_bilinear_dw.float() * .001 if self.resize_dmap is None: dmap_rawsize = dmap_raw dmap_mask = 1 - (proc_totensor(dmap_mask) > 0) dmap_mask_imgsize = 1 - (proc_totensor(dmap_mask_imgsize) > 0) dmap_raw = dmap_raw * dmap_mask.squeeze().type_as(dmap_raw) dmap_raw_bilinear_dw = dmap_raw_bilinear_dw * dmap_mask.squeeze( ).type_as(dmap_raw) dmap_imgsize = dmap_imgsize * dmap_mask_imgsize.squeeze().type_as( dmap_imgsize) if self.digitize: # digitize the depth map # dmap = mloader_misc.dMap_to_indxMap(dmap_raw, self.d_candi) dmap[dmap >= self.label_max] = self.label_max dmap[dmap <= self.label_min] = self.label_min dmap = torch.from_numpy(dmap) dmap_imgsize_digit = mloader_misc.dMap_to_indxMap( dmap_imgsize, self.d_candi) dmap_imgsize_digit[ dmap_imgsize_digit >= self.label_max] = self.label_max dmap_imgsize_digit[ dmap_imgsize_digit <= self.label_min] = self.label_min dmap_imgsize_digit = torch.from_numpy(dmap_imgsize_digit) dmap_up4_imgsize_digit = mloader_misc.dMap_to_indxMap( dmap_imgsize, self.dup4_candi) dmap_up4_imgsize_digit[dmap_up4_imgsize_digit >= self.dup4_label_max] = self.dup4_label_max dmap_up4_imgsize_digit[dmap_up4_imgsize_digit <= self.dup4_label_min] = self.dup4_label_min dmap_up4_imgsize_digit = torch.from_numpy(dmap_up4_imgsize_digit) else: dmap = dmap_raw dmap_imgsize_digit = dmap_imgsize dmap_up4_imgsize_digit = dmap_imgsize if self.if_preprocess: img = proc_normalize(img) else: proc_totensor = tfv_transform.ToTensor() img = proc_totensor(img) # extrinsics # extM = read_ExtM_from_txt(cam_pose_path) # image path # scene_path = os.path.split(img_path)[0] return { 'img': img.unsqueeze_(0), 'dmap': dmap.unsqueeze_(0), 'dmap_raw': dmap_raw.unsqueeze_(0), 'dmap_raw_bilinear_dw': dmap_raw_bilinear_dw.unsqueeze_(0), 'dmap_rawsize': dmap_rawsize.unsqueeze_(0), 'dmap_imgsize': dmap_imgsize.unsqueeze_(0), 'dmap_imgsize_digit': dmap_imgsize_digit.unsqueeze_(0), 'dmap_up4_imgsize_digit': dmap_up4_imgsize_digit.unsqueeze_(0), 'img_gray': img_gray.unsqueeze_(0), 'dmap_mask': dmap_mask.unsqueeze_(0).type_as(dmap), 'dmap_mask_imgsize': dmap_mask_imgsize.unsqueeze_(0).type_as(dmap), 'extM': extM, 'scene_path': scene_path, 'img_path': img_path, }