for i, _ in enumerate(listdir('./datasets/openDR/openDR_dataset/depth')): #for i in range(2304, 3071):#2304): with Image.open('./datasets/openDR/openDR_dataset/depth/' + str(i) + '.png') as dpt_im: dpt = np.array(dpt_im) print(dpt.dtype) print(dpt.max()) dpt = dpt / 65535 #Scaling down between 0 - 3m dpt = dpt * 3.0 print(dpt.max()) #Back-projection util function cld, choose = bs_utils.dpt_2_cld(dpt, 1, cfg.intrinsic_matrix['openDR']) normals = get_normal(cld) all_arr = np.concatenate( (cld, choose.reshape(choose.shape[0], 1), normals[:, :3]), axis=1) #Write cloud, choose-filter and normals in npy files for retrieval in dataloader ''' This is to prevent repeated calculations while Batch-loading during training, as it takes a considerable amount of time for each batch. ''' with open('./datasets/openDR/openDR_dataset/' + str(i) + '.npy', 'wb') as f: print('Writing file ' + str(i)) np.save(f, all_arr)
listdir('./datasets/CrankSlider/CrankSlider_dataset/depth')): #for i in range(2304, 3071):#2304): with Image.open('./datasets/CrankSlider/CrankSlider_dataset/depth/' + str(i) + '.png') as dpt_im: dpt = np.array(dpt_im) print(dpt.dtype) print(dpt.max()) dpt = dpt / 65535 #Scaling down between 0 - 3m dpt = dpt * 3.0 print(dpt.max()) #Back-projection util function cld, choose = bs_utils.dpt_2_cld(dpt, 1, cfg.intrinsic_matrix['CrankSlider']) normals = get_normal(cld) all_arr = np.concatenate( (cld, choose.reshape(choose.shape[0], 1), normals[:, :3]), axis=1) #Write cloud, choose-filter and normals in npy files for retrieval in dataloader ''' This is to prevent repeated calculations while Batch-loading during training, as it takes a considerable amount of time for each batch. ''' with open('./datasets/CrankSlider/CrankSlider_dataset/' + str(i) + '.npy', 'wb') as f: print('Writing file ' + str(i)) np.save(f, all_arr)
class LM_Dataset(): def __init__(self, dataset_name, cls_type="duck"): self.config = Config(dataset_name='linemod', cls_type=cls_type) self.bs_utils = Basic_Utils(self.config) self.dataset_name = dataset_name self.xmap = np.array([[j for i in range(640)] for j in range(480)]) self.ymap = np.array([[i for i in range(640)] for j in range(480)]) self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05) self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.224]) self.obj_dict = self.config.lm_obj_dict self.cls_type = cls_type self.cls_id = self.obj_dict[cls_type] print("cls_id in lm_dataset.py", self.cls_id) self.root = os.path.join(self.config.lm_root, 'Linemod_preprocessed') self.cls_root = os.path.join(self.root, "data/%02d/" % self.cls_id) self.rng = np.random meta_file = open(os.path.join(self.cls_root, 'gt.yml'), "r") self.meta_lst = yaml.load(meta_file) if dataset_name == 'train': self.add_noise = True real_img_pth = os.path.join(self.cls_root, "train.txt") self.real_lst = self.bs_utils.read_lines(real_img_pth) rnd_img_pth = os.path.join( self.root, "renders/{}/file_list.txt".format(cls_type)) self.rnd_lst = self.bs_utils.read_lines(rnd_img_pth) fuse_img_pth = os.path.join( self.root, "fuse/{}/file_list.txt".format(cls_type)) try: self.fuse_lst = self.bs_utils.read_lines(fuse_img_pth) except: # no fuse dataset self.fuse_lst = self.rnd_lst self.all_lst = self.real_lst + self.rnd_lst + self.fuse_lst else: self.add_noise = False self.pp_data = None if os.path.exists(self.config.preprocessed_testset_pth ) and self.config.use_preprocess: print('Loading valtestset.') with open(self.config.preprocessed_testset_pth, 'rb') as f: self.pp_data = pkl.load(f) self.all_lst = [i for i in range(len(self.pp_data))] print('Finish loading valtestset.') else: tst_img_pth = os.path.join(self.cls_root, "test.txt") self.tst_lst = self.bs_utils.read_lines(tst_img_pth) self.all_lst = self.tst_lst print("{}_dataset_size: ".format(dataset_name), len(self.all_lst)) def real_syn_gen(self, real_ratio=0.3): if self.rng.rand() < real_ratio: # real n_imgs = len(self.real_lst) idx = self.rng.randint(0, n_imgs) pth = self.real_lst[idx] return pth else: fuse_ratio = 0.4 if self.rng.rand() < fuse_ratio: idx = self.rng.randint(0, len(self.fuse_lst)) pth = self.fuse_lst[idx] else: idx = self.rng.randint(0, len(self.rnd_lst)) pth = self.rnd_lst[idx] return pth def real_gen(self): idx = self.rng.randint(0, len(self.real_lst)) item = self.real_lst[idx] return item def rand_range(self, rng, lo, hi): return rng.rand() * (hi - lo) + lo def gaussian_noise(self, rng, img, sigma): """add gaussian noise of given sigma to image""" img = img + rng.randn(*img.shape) * sigma img = np.clip(img, 0, 255).astype('uint8') return img def linear_motion_blur(self, img, angle, length): """:param angle: in degree""" rad = np.deg2rad(angle) dx = np.cos(rad) dy = np.sin(rad) a = int(max(list(map(abs, (dx, dy)))) * length * 2) if a <= 0: return img kern = np.zeros((a, a)) cx, cy = a // 2, a // 2 dx, dy = list(map(int, (dx * length + cx, dy * length + cy))) cv2.line(kern, (cx, cy), (dx, dy), 1.0) s = kern.sum() if s == 0: kern[cx, cy] = 1.0 else: kern /= s return cv2.filter2D(img, -1, kern) def rgb_add_noise(self, img): rng = self.rng # apply HSV augmentor if rng.rand() > 0: hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.uint16) hsv_img[:, :, 1] = hsv_img[:, :, 1] * self.rand_range( rng, 1 - 0.25, 1 + .25) hsv_img[:, :, 2] = hsv_img[:, :, 2] * self.rand_range( rng, 1 - .15, 1 + .15) hsv_img[:, :, 1] = np.clip(hsv_img[:, :, 1], 0, 255) hsv_img[:, :, 2] = np.clip(hsv_img[:, :, 2], 0, 255) img = cv2.cvtColor(hsv_img.astype(np.uint8), cv2.COLOR_HSV2BGR) if rng.rand() > 0.8: # motion blur r_angle = int(rng.rand() * 360) r_len = int(rng.rand() * 15) + 1 img = self.linear_motion_blur(img, r_angle, r_len) if rng.rand() > 0.8: if rng.rand() > 0.2: img = cv2.GaussianBlur(img, (3, 3), rng.rand()) else: img = cv2.GaussianBlur(img, (5, 5), rng.rand()) return np.clip(img, 0, 255).astype(np.uint8) def get_normal(self, cld): cloud = pcl.PointCloud() cld = cld.astype(np.float32) cloud.from_array(cld) ne = cloud.make_NormalEstimation() kdtree = cloud.make_kdtree() ne.set_SearchMethod(kdtree) ne.set_KSearch(50) n = ne.compute() n = n.to_array() return n def add_real_back(self, rgb, labels, dpt, dpt_msk): real_item = self.real_gen() with Image.open( os.path.join(self.cls_root, "depth/{}.png".format(real_item))) as di: real_dpt = np.array(di) with Image.open( os.path.join(self.cls_root, "mask/{}.png".format(real_item))) as li: bk_label = np.array(li) bk_label = (bk_label <= 0).astype(rgb.dtype) if len(bk_label.shape) < 3: bk_label_3c = np.repeat(bk_label[:, :, None], 3, 2) else: bk_label_3c = bk_label bk_label = bk_label[:, :, 0] with Image.open( os.path.join(self.cls_root, "rgb/{}.png".format(real_item))) as ri: back = np.array(ri)[:, :, :3] * bk_label_3c back = back[:, :, ::-1].copy() dpt_back = real_dpt.astype(np.float32) * bk_label.astype(np.float32) msk_back = (labels <= 0).astype(rgb.dtype) msk_back = np.repeat(msk_back[:, :, None], 3, 2) imshow("msk_back", msk_back) rgb = rgb * (msk_back == 0).astype(rgb.dtype) + back * msk_back dpt = dpt * (dpt_msk > 0).astype(dpt.dtype) + \ dpt_back * (dpt_msk <=0).astype(dpt.dtype) return rgb, dpt def get_item(self, item_name): try: if "pkl" in item_name: data = pkl.load(open(item_name, "rb")) dpt = data['depth'] rgb = data['rgb'] labels = data['mask'] K = data['K'] RT = data['RT'] rnd_typ = data['rnd_typ'] if rnd_typ == "fuse": labels = (labels == self.cls_id).astype("uint8") else: labels = (labels > 0).astype("uint8") cam_scale = 1.0 else: with Image.open( os.path.join(self.cls_root, "depth/{}.png".format(item_name))) as di: dpt = np.array(di) with Image.open( os.path.join(self.cls_root, "mask/{}.png".format(item_name))) as li: labels = np.array(li) labels = (labels > 0).astype("uint8") with Image.open( os.path.join(self.cls_root, "rgb/{}.png".format(item_name))) as ri: if self.add_noise: ri = self.trancolor(ri) rgb = np.array(ri)[:, :, :3] meta = self.meta_lst[int(item_name)] if self.cls_id == 2: for i in range(0, len(meta)): if meta[i]['obj_id'] == 2: meta = meta[i] break else: meta = meta[0] R = np.resize(np.array(meta['cam_R_m2c']), (3, 3)) T = np.array(meta['cam_t_m2c']) / 1000.0 RT = np.concatenate((R, T[:, None]), axis=1) rnd_typ = 'real' K = self.config.intrinsic_matrix["linemod"] cam_scale = 1000.0 rgb = rgb[:, :, ::-1].copy() msk_dp = dpt > 1e-6 if len(labels.shape) > 2: labels = labels[:, :, 0] rgb_labels = labels.copy() if self.add_noise and rnd_typ == 'render': rgb = self.rgb_add_noise(rgb) rgb_labels = labels.copy() rgb, dpt = self.add_real_back(rgb, rgb_labels, dpt, msk_dp) if self.rng.rand() > 0.8: rgb = self.rgb_add_noise(rgb) rgb = np.transpose(rgb, (2, 0, 1)) # hwc2chw cld, choose = self.bs_utils.dpt_2_cld(dpt, cam_scale, K) labels = labels.flatten()[choose] rgb_lst = [] for ic in range(rgb.shape[0]): rgb_lst.append(rgb[ic].flatten()[choose].astype(np.float32)) rgb_pt = np.transpose(np.array(rgb_lst), (1, 0)).copy() choose = np.array([choose]) choose_2 = np.array([i for i in range(len(choose[0, :]))]) if len(choose_2) < 400: return None if len(choose_2) > self.config.n_sample_points: c_mask = np.zeros(len(choose_2), dtype=int) c_mask[:self.config.n_sample_points] = 1 np.random.shuffle(c_mask) choose_2 = choose_2[c_mask.nonzero()] else: choose_2 = np.pad( choose_2, (0, self.config.n_sample_points - len(choose_2)), 'wrap') cld_rgb = np.concatenate((cld, rgb_pt), axis=1) cld_rgb = cld_rgb[choose_2, :] cld = cld[choose_2, :] normal = self.get_normal(cld)[:, :3] normal[np.isnan(normal)] = 0.0 cld_rgb_nrm = np.concatenate((cld_rgb, normal), axis=1) choose = choose[:, choose_2] labels = labels[choose_2].astype(np.int32) RTs = np.zeros((self.config.n_objects, 3, 4)) kp3ds = np.zeros( (self.config.n_objects, self.config.n_keypoints, 3)) ctr3ds = np.zeros((self.config.n_objects, 3)) cls_ids = np.zeros((self.config.n_objects, 1)) kp_targ_ofst = np.zeros( (self.config.n_sample_points, self.config.n_keypoints, 3)) ctr_targ_ofst = np.zeros((self.config.n_sample_points, 3)) for i, cls_id in enumerate([1]): RTs[i] = RT r = RT[:, :3] t = RT[:, 3] ctr = self.bs_utils.get_ctr(self.cls_type, ds_type="linemod")[:, None] ctr = np.dot(ctr.T, r.T) + t ctr3ds[i, :] = ctr[0] msk_idx = np.where(labels == cls_id)[0] target_offset = np.array(np.add(cld, -1.0 * ctr3ds[i, :])) ctr_targ_ofst[msk_idx, :] = target_offset[msk_idx, :] cls_ids[i, :] = np.array([1]) key_kpts = '' if self.config.n_keypoints == 8: kp_type = 'farthest' else: kp_type = 'farthest{}'.format(self.config.n_keypoints) kps = self.bs_utils.get_kps(self.cls_type, kp_type=kp_type, ds_type='linemod') kps = np.dot(kps, r.T) + t kp3ds[i] = kps target = [] for kp in kps: target.append(np.add(cld, -1.0 * kp)) target_offset = np.array(target).transpose( 1, 0, 2) # [npts, nkps, c] kp_targ_ofst[msk_idx, :, :] = target_offset[msk_idx, :, :] # rgb, pcld, cld_rgb_nrm, choose, kp_targ_ofst, ctr_targ_ofst, cls_ids, RTs, labels, kp_3ds, ctr_3ds if DEBUG: return torch.from_numpy(rgb.astype(np.float32)), \ torch.from_numpy(cld.astype(np.float32)), \ torch.from_numpy(cld_rgb_nrm.astype(np.float32)), \ torch.LongTensor(choose.astype(np.int32)), \ torch.from_numpy(kp_targ_ofst.astype(np.float32)), \ torch.from_numpy(ctr_targ_ofst.astype(np.float32)), \ torch.LongTensor(cls_ids.astype(np.int32)), \ torch.from_numpy(RTs.astype(np.float32)), \ torch.LongTensor(labels.astype(np.int32)), \ torch.from_numpy(kp3ds.astype(np.float32)), \ torch.from_numpy(ctr3ds.astype(np.float32)), \ torch.from_numpy(K.astype(np.float32)), \ torch.from_numpy(np.array(cam_scale).astype(np.float32)) return torch.from_numpy(rgb.astype(np.float32)), \ torch.from_numpy(cld.astype(np.float32)), \ torch.from_numpy(cld_rgb_nrm.astype(np.float32)), \ torch.LongTensor(choose.astype(np.int32)), \ torch.from_numpy(kp_targ_ofst.astype(np.float32)), \ torch.from_numpy(ctr_targ_ofst.astype(np.float32)), \ torch.LongTensor(cls_ids.astype(np.int32)), \ torch.from_numpy(RTs.astype(np.float32)), \ torch.LongTensor(labels.astype(np.int32)), \ torch.from_numpy(kp3ds.astype(np.float32)), \ torch.from_numpy(ctr3ds.astype(np.float32)), except: return None def __len__(self): return len(self.all_lst) def __getitem__(self, idx): if self.dataset_name == 'train': item_name = self.real_syn_gen() data = self.get_item(item_name) while data is None: item_name = self.real_syn_gen() data = self.get_item(item_name) return data else: if self.pp_data is None or not self.config.use_preprocess: item_name = self.all_lst[idx] return self.get_item(item_name) else: data = self.pp_data[idx] return data
class RGBDPoseAPI(): r"""Interface of PVN3D network for inference on a single RGBD image. Parameters ---------- weights_path : str Path to weights file of the network """ def __init__(self, weights_path): # initialize configs and model object self.config = Config(dataset_name='ycb') self.bs_utils = Basic_Utils(self.config) self.model = self.define_network(weights_path) self.rgb = None self.cld = None self.cld_rgb_nrm = None self.choose = None self.cls_id_lst = None def load_checkpoint(self, model=None, optimizer=None, filename="checkpoint"): # load network checkpoint from weights file filename = "{}.pth.tar".format(filename) if os.path.isfile(filename): print("==> Loading from checkpoint '{}'".format(filename)) try: checkpoint = torch.load(filename) except: checkpoint = pkl.load(open(filename, "rb")) epoch = checkpoint["epoch"] it = checkpoint.get("it", 0.0) best_prec = checkpoint["best_prec"] if model is not None and checkpoint["model_state"] is not None: model.load_state_dict(checkpoint["model_state"]) if optimizer is not None and checkpoint[ "optimizer_state"] is not None: optimizer.load_state_dict(checkpoint["optimizer_state"]) print("==> Done") return it, epoch, best_prec else: print("==> Checkpoint '{}' not found".format(filename)) return None def define_network(self, weights_path): # define model object on GPU model = PVN3D(num_classes=self.config.n_objects, pcld_input_channels=6, pcld_use_xyz=True, num_points=self.config.n_sample_points).cuda() # convert batch norm into synchornized batch norm model = convert_model(model) # model to GPU model.cuda() # load weights checkpoint_status = self.load_checkpoint(model, None, filename=weights_path[:-8]) # convert model to distributed mode model = nn.DataParallel(model) return model def get_normal(self, cld): # get normals from point cloud cloud = pcl.PointCloud() cld = cld.astype(np.float32) cloud.from_array(cld) ne = cloud.make_NormalEstimation() kdtree = cloud.make_kdtree() ne.set_SearchMethod(kdtree) ne.set_KSearch(50) n = ne.compute() n = n.to_array() return n def preprocess_rgbd(self, image, depth, cam_scale=25.0): # preprocess RGBD data to be passed to network # get camera intrinsics K = self.config.intrinsic_matrix['ycb_K1'] # fill missing points in depth map dpt = self.bs_utils.fill_missing(depth, cam_scale, 1) rgb = np.transpose(image, (2, 0, 1)) # convert depth map to point cloud cld, choose = self.bs_utils.dpt_2_cld(dpt, cam_scale, K) normal = self.get_normal(cld)[:, :3] normal[np.isnan(normal)] = 0.0 # construct complete RGB point cloud rgb_lst = [] for ic in range(rgb.shape[0]): rgb_lst.append(rgb[ic].flatten()[choose].astype(np.float32)) rgb_pt = np.transpose(np.array(rgb_lst), (1, 0)).copy() choose = np.array([choose]) choose_2 = np.array([i for i in range(len(choose[0, :]))]) if len(choose_2) < 400: return None if len(choose_2) > self.config.n_sample_points: c_mask = np.zeros(len(choose_2), dtype=int) c_mask[:self.config.n_sample_points] = 1 np.random.shuffle(c_mask) choose_2 = choose_2[c_mask.nonzero()] else: choose_2 = np.pad(choose_2, (0, self.config.n_sample_points - len(choose_2)), 'wrap') cld_rgb_nrm = np.concatenate((cld, rgb_pt, normal), axis=1) cld = cld[choose_2, :] cld_rgb_nrm = cld_rgb_nrm[choose_2, :] choose = choose[:, choose_2] # define classes indices to be considered cls_id_lst = np.array(range(1, 22)) # convert processed data into torch tensors rgb = torch.from_numpy(rgb.astype(np.float32)) cld = torch.from_numpy(cld.astype(np.float32)) cld_rgb_nrm = torch.from_numpy(cld_rgb_nrm.astype(np.float32)) choose = torch.LongTensor(choose.astype(np.int32)) cls_id_lst = torch.LongTensor(cls_id_lst.astype(np.int32)) # reshape and copy to GPU self.rgb = rgb.reshape( (1, rgb.shape[0], rgb.shape[1], rgb.shape[2])).cuda() self.cld = cld.reshape((1, cld.shape[0], cld.shape[1])).cuda() self.cld_rgb_nrm = cld_rgb_nrm.reshape( (1, cld_rgb_nrm.shape[0], cld_rgb_nrm.shape[1])).cuda() self.choose = choose.reshape( (1, choose.shape[0], choose.shape[1])).cuda() self.cls_id_lst = cls_id_lst.reshape((1, cls_id_lst.shape[0])) def get_poses(self, save_results=True): # perform inference and return objects' poses # model to eval mode self.model.eval() # perform inference on defined model with torch.set_grad_enabled(False): # network forward pass pred_kp_of, pred_rgbd_seg, pred_ctr_of = self.model( self.cld_rgb_nrm, self.rgb, self.choose) _, classes_rgbd = torch.max(pred_rgbd_seg, -1) # calculate poses by voting, clustering and linear fitting pred_cls_ids, pred_pose_lst = cal_frame_poses( self.cld[0], classes_rgbd[0], pred_ctr_of[0], pred_kp_of[0], True, self.config.n_objects, True) # visualize predicted poses if save_results: np_rgb = self.rgb.cpu().numpy().astype("uint8")[0].transpose( 1, 2, 0).copy() np_rgb = np_rgb[:, :, ::-1].copy() ori_rgb = np_rgb.copy() # loop over each class id for cls_id in self.cls_id_lst[0].cpu().numpy(): idx = np.where(pred_cls_ids == cls_id)[0] if len(idx) == 0: continue pose = pred_pose_lst[idx[0]] obj_id = int(cls_id) mesh_pts = self.bs_utils.get_pointxyz( obj_id, ds_type='ycb').copy() mesh_pts = np.dot(mesh_pts, pose[:, :3].T) + pose[:, 3] K = self.config.intrinsic_matrix["ycb_K1"] mesh_p2ds = self.bs_utils.project_p3d(mesh_pts, 1.0, K) color = self.bs_utils.get_label_color(obj_id, n_obj=22, mode=1) np_rgb = self.bs_utils.draw_p2ds(np_rgb, mesh_p2ds, color=color) # save output visualization vis_dir = os.path.join(self.config.log_eval_dir, "pose_vis") if not os.path.exists(vis_dir): os.system('mkdir -p {}'.format(vis_dir)) f_pth = os.path.join(vis_dir, "out.jpg") cv2.imwrite(f_pth, np_rgb) # return prediction return pred_cls_ids, pred_pose_lst