def __getitem__(self, index): # OUTPUTS: # rgb - input rgb tensor [3, height, width] # mask - ground truth segmentation tensor [2, height, width] # ver - ground truth vertex tensor [2*nKeypoints, height, width] # verWeights - ground truth binary weight tensor [_, height, width] # Decide which scene and which image this sample index corresponds to sceneIdx, viewIdx = self.idxToSceneView[index] sceneDir = self.sceneDirs[sceneIdx-1] # RGB rgbPath = os.path.join(sceneDir, 'rgb', str(viewIdx).zfill(self.formats['rgbNLeadingZeros'])+'.'+self.formatList[sceneIdx-1]) rgb = read_rgb_np(rgbPath) rgb = self.test_img_transforms(Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) _, height, width = rgb.shape # Mask segDir = os.path.join(sceneDir, 'seg') segPath = os.path.join(segDir, str(viewIdx).zfill(self.formats['segNLeadingZeros'])+'.'+self.formats['segFormat']) segImg = np.asarray(Image.open(segPath)) instanceIdxPath = os.path.join(segDir, 'classindex.txt') instanceIdx = np.loadtxt(instanceIdxPath, delimiter=',') if instanceIdx.shape == (): # Special case for single instance instanceIdx.shape = (1,) idxMatch = (instanceIdx == self.classIdx).astype(int) idxMatch = np.concatenate(([0], idxMatch)) # Add 'no-class' idx mask = torch.tensor(idxMatch[segImg], dtype=torch.int64) # Vertex nKeypoints = (self.keypoints).shape[1] ver = np.zeros([height,width,nKeypoints*2],np.float32) instanceSegImg = mask.numpy()*segImg nInstances = sum(idxMatch) idx = [i for i,j in enumerate(instanceIdx) if j==self.classIdx] poses = [parse_pose(self.poseData[sceneIdx-1], viewIdx-1, iPose) for iPose in idx] for iInstance in range(nInstances): thisMask = instanceSegImg == idx[iInstance]+1 keypointsProjected = self.K[sceneIdx-1] @ (poses[iInstance] @ pextend(self.keypoints, vecType='col')) keypointsProjected = pflat(keypointsProjected) ver = ver + compute_vertex_hcoords(thisMask, keypointsProjected.T) ver=torch.tensor(ver, dtype=torch.float32).permute(2, 0, 1) verWeights=mask.unsqueeze(0).float() return rgb, mask, ver, verWeights
def __getitem__(self, index): # OUTPUTS: # rgb - input rgb tensor [3, height, width] # mask - ground truth segmentation tensor [2, height, width] # ver - ground truth vertex tensor [2*nKeypoints, height, width] # verWeights - ground truth binary weight tensor [_, height, width] # RGB rgbPath = self.rgbDir + '/' + str( index + 1 ) + '.' + self.rgbFormat # TO-DO: Change s.t. we can read from several different folders rgb = read_rgb_np(rgbPath) # Np array rgb = self.test_img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) _, height, width = rgb.shape # Mask segPath = self.segDir + '/' + str(index + 1).zfill( self.nLeadingZerosFormat) + '.' + self.segFormat segImg = np.asarray(Image.open(segPath)) instanceIdxPath = self.segDir + '/' + 'classindex.txt' instanceIdx = np.loadtxt(instanceIdxPath, delimiter=',') idxMatch = (instanceIdx == self.classIdx).astype(int) idxMatch = np.concatenate(([0], idxMatch)) # Add 'no-class' idx mask = torch.tensor(idxMatch[segImg], dtype=torch.int64) # Vertex nKeypoints = (self.keypoints).shape[1] ver = np.zeros([height, width, nKeypoints * 2], np.float32) instanceSegImg = mask.numpy() * segImg nInstances = sum(idxMatch) idx = [i for i, j in enumerate(instanceIdx) if j == self.classIdx] poses = [parse_pose(self.poseData, index, iPose) for iPose in idx] for iInstance in range(nInstances): thisMask = instanceSegImg == idx[iInstance] + 1 keypointsProjected = self.K @ ( poses[iInstance] @ pextend(self.keypoints, vecType='col')) keypointsProjected = pflat(keypointsProjected) ver = ver + compute_vertex_hcoords(thisMask, keypointsProjected.T) ver = torch.tensor(ver, dtype=torch.float32).permute(2, 0, 1) verWeights = mask.unsqueeze(0).float() return rgb, mask, ver, verWeights
def __getitem__(self, index_tuple): index, height, width = index_tuple rgb_path = os.path.join(self.data_prefix, self.imagedb[index]['rgb_pth']) mask_path = os.path.join(self.data_prefix, self.imagedb[index]['dpt_pth']) pose = self.imagedb[index]['RT'].copy() rgb = read_rgb_np(rgb_path) mask = read_mask_np(mask_path) if self.imagedb[index]['rnd_typ'] == 'real' and len(mask.shape) == 3: mask = np.sum(mask, 2) > 0 mask = np.asarray(mask, np.int32) if self.imagedb[index]['rnd_typ'] == 'fuse': mask = np.asarray( mask == (cfg.linemod_cls_names.index( self.imagedb[index]['cls_typ']) + 1), np.int32) hcoords = VotingType.get_data_pts_2d(self.vote_type, self.imagedb[index]) if self.use_intrinsic: K = torch.tensor(self.imagedb[index]['K'].astype(np.float32)) if self.augment: rgb, mask, hcoords = self.augmentation(rgb, mask, hcoords, height, width) ver = compute_vertex_hcoords(mask, hcoords, self.use_motion) ver = torch.tensor(ver, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) ver_weight = mask.unsqueeze(0).float() if self.augment: # and self.imagedb[index]['rnd_typ']!='real': # if not real and do augmentation then jitter color if self.cfg['blur'] and np.random.random() < 0.5: blur_image(rgb, np.random.choice([3, 5, 7, 9])) if self.cfg['jitter']: rgb = self.img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) else: rgb = self.test_img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) if self.cfg['use_mask_out'] and np.random.random() < 0.1: rgb *= (mask[None, :, :]).float() else: rgb = self.test_img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) if self.imagedb[index]['rnd_typ'] == 'fuse' and self.cfg[ 'ignore_fuse_ms_vertex']: ver_weight *= 0.0 pose = torch.tensor(pose.astype(np.float32)) hcoords = torch.tensor(hcoords.astype(np.float32)) if self.use_intrinsic: return rgb, mask, ver, ver_weight, pose, hcoords, K else: return rgb, mask, ver, ver_weight, pose, hcoords
# Load inner parameter matrix K = parse_inner_parameters(paths['cameraPath']) # Load point clouds print('Loading pointclouds...') tStart = time.time() cadX = [] for iClass in range(nClasses): #cadX.append(load_model_pointcloud(paths['modelDir'], iClass+1)[::20,:]) modelPath = os.path.join(paths['modelDir'], str(iClass+1)+'.ply') cadX.append(np.asarray(read_point_cloud(modelPath).points)[::20,:]) print('Fininshed loading pointclouds after {} seconds.'.format(time.time() - tStart)) # Parse the image width and height (assuming they all share the width/height of image 1) rgbPath = os.path.join(paths['rgbDir'], '1.png') rgb = read_rgb_np(rgbPath) height, width, _ = rgb.shape def FindNPtsWithinBorders(points, xmin, xmax, ymin, ymax): n = 0 for i in range(points.shape[0]): p = points[i] if (p[0] < xmin) or (p[0] > xmax): continue elif (p[1] < ymin) or (p[1] > ymax): continue else: n += 1 return n
def __getitem__(self, index_tuple): index, height, width = index_tuple rgb_path = os.path.join(self.data_prefix, self.imagedb[index]['rgb_path']) mask_path = os.path.join(self.data_prefix, self.imagedb[index]['mask_path']) pose = self.imagedb[index]['RT'].copy() rgb = read_rgb_np(rgb_path) mask = read_mask_ycb(mask_path) # if self.imagedb[index]['rnd_type'] == 'render': # rand_ind = np.random.randint(self.background_list_len) # bg_path = self.background_list[rand_ind] # bg = read_rgb_np(bg_path) # mask_bin = np.zeros(mask.shape, dtype=mask.dtype) # mask_bin[mask > 0] = 1 # mask_bin = np.expand_dims(mask_bin, axis=2) # bg = cv2.resize(bg, (640, 480)) # rgb = rgb * mask_bin + bg *(1-mask_bin) # rgb = np.asarray(rgb, dtype=np.uint8) if self.imagedb[index]['rnd_type'] == 'render': mask[mask > 0] = 1 # print(np.max(rgb), np.min(rgb)) # 0 -- 255 # noise_mean, noise_std = 0, 0 # gauss = np.asarray(np.random.normal(noise_mean, noise_std, rgb.shape), dtype=np.int) # rgb = np.asarray(rgb, dtype=np.int) # rgb = rgb+gauss # rgb[rgb>255] = 255 # rgb[rgb<0] = 0 # rgb = np.asarray(rgb, dtype=np.uint8) # cv2.imwrite('img_{}.jpg'.format(index), rgb) elif self.imagedb[index]['rnd_type'] == 'syn': rand_ind = np.random.randint(self.background_list_len) bg_path = self.background_list[rand_ind] bg = read_rgb_np(bg_path) if bg.ndim == 3: mask_bin = np.zeros(rgb.shape, dtype=mask.dtype) mask_bin[mask > 0] = 1 bg = cv2.resize(bg, (640, 480)) rgb = rgb * mask_bin + bg * (1 - mask_bin) rgb = np.asarray(rgb, dtype=np.uint8) mask[mask > 0] = 1 else: ind = self.cls_name_id[self.cls_type] + 1 mask[mask != ind] = 0 mask[mask == ind] = 1 # if self.imagedb[index]['rnd_typ']=='real' and len(mask.shape)==3: # mask=np.sum(mask,2)>0 # mask=np.asarray(mask,np.int32) # # if self.imagedb[index]['rnd_typ']=='syn': # mask=np.asarray(mask==(cfg.ycb_cls_names.index(self.imagedb[index]['cls_typ'])+1),np.int32) hcoords = VotingType.get_data_pts_2d(self.vote_type, self.imagedb[index]) K = torch.tensor(self.imagedb[index]['K'].astype(np.float32)) if self.augment: rgb, mask, hcoords = self.augmentation(rgb, mask, hcoords, height, width) ver = compute_vertex_hcoords(mask, hcoords, self.use_motion) ver = torch.tensor(ver, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) ver_weight = mask.unsqueeze(0).float() if self.augment: # and self.imagedb[index]['rnd_typ']!='real': # if not real and do augmentation then jitter color if self.cfg['blur'] and np.random.random() < 0.5: blur_image(rgb, np.random.choice([3, 5, 7, 9])) if self.cfg['jitter']: rgb = self.img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) else: rgb = self.test_img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) if self.cfg['use_mask_out'] and np.random.random() < 0.1: rgb *= (mask[None, :, :]).float() else: rgb = self.test_img_transforms( Image.fromarray(np.ascontiguousarray(rgb, np.uint8))) # if self.imagedb[index]['rnd_typ']=='syn' and self.cfg['ignore_fuse_ms_vertex']: ver_weight*=0.0 pose = torch.tensor(pose.astype(np.float32)) hcoords = torch.tensor(hcoords.astype(np.float32)) if self.use_intrinsic: return rgb, mask, ver, ver_weight, pose, hcoords, K else: return rgb, mask, ver, ver_weight, pose, hcoords