Exemple #1
0
  def __getitem__helper(self, index):
      #import ipdb;ipdb.set_trace()
      rets = {}
      index = index % self.__len__()
      imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32)
      imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32)
      imgs_rgb = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32)
      imgs_normal = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32)
      pointcloud = np.zeros((self.nViews, 3+3+3+1, self.num_points), dtype = np.float32)
      
      R = np.zeros((self.nViews, 4, 4))
      Q = np.zeros((7))
      assert(self.nViews == 2)
      imgsPath = []
      ct0,ct1 = self.__getpair__(index)
      
      if 'scannet_test_scenes' not in self.list:
        rets['overlap'] = float(self.dataList[index]['overlap'])
      
      room_id = self.base_this.split('/')[-1]

      basePath = os.path.join(self.base, room_id)

      
      imageKey = '%s-%06d-rgb' % (room_id, ct0)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_rgb[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0
      imageKey = '%s-%06d-rgb' % (room_id, ct1)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_rgb[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0

      
      imageKey = '%s-%06d-depth' % (room_id, ct0)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0
      imageKey = '%s-%06d-depth' % (room_id, ct1)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0
      

      #cv2.imwrite('test.png',imgs_rgb[0]*255)
      imageKey = '%s-%06d-normal' % (room_id, ct0)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_normal[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1
      imageKey = '%s-%06d-normal' % (room_id, ct1)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_normal[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1
      
      imageKey = '%s-%06d-semantic' % (room_id, ct0)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_s[0] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0]
      imageKey = '%s-%06d-semantic' % (room_id, ct1)
      imageBin = self.txn.get(imageKey.encode())
      imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
      imgs_s[1] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0]
      
      PerspectiveValidMask = (imgs_depth!=0)
      rets['PerspectiveValidMask'] = PerspectiveValidMask[None,:,None,:,:]
      rets['dataMask'] = rets['PerspectiveValidMask']
      
      RKey = '%s-%06d-R' % (room_id, ct0)
      R[0] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4)
      
      RKey = '%s-%06d-R' % (room_id, ct1)
      R[1] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4)
      # convert from 3rd view to 4th view
      
      #R[0] = np.matmul(np.linalg.inv(self.Rs[3]),R[0])
      #R[1] = np.matmul(np.linalg.inv(self.Rs[3]),R[1])
      
      R_inv = np.linalg.inv(R)
      img2ind = np.zeros([2, self.num_points, 3])
      imgPCid = np.zeros([2,  self.num_points, 2])
      
      if self.fullsize_rgbdn:
        imgs_rgb_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32)
        imgs_norm_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32)
        imgs_full = np.zeros((self.nViews, 480,640), dtype = np.float32)
        imgs_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct0))).copy()
        imgs_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct1))).copy()
        imgs_rgb_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct0)),depth=False).copy()/255.
        imgs_rgb_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct1)),depth=False).copy()/255.
        imgs_norm_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct0)),depth=False).copy()/255*2-1.
        imgs_norm_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct1)),depth=False).copy()/255*2-1.
        rets['rgb_full'] = imgs_rgb_full[np.newaxis,:]
        rets['norm_full'] = imgs_norm_full[np.newaxis,:]
        rets['depth_full'] = imgs_full[np.newaxis,:]
      
      if self.denseCorres:
        
        # get 3d point cloud for each pano
        pcs,masks = self.depth2pc(imgs_depth[0],needmask=True) # be aware of the order of returned pc!!!
        pct,maskt = self.depth2pc(imgs_depth[1],needmask=True)
        pct = (np.matmul(R_inv[1][:3,:3], pct.T) + R_inv[1][:3,3:4]).T
        pcs = (np.matmul(R_inv[0][:3,:3], pcs.T) + R_inv[0][:3,3:4]).T
        inds = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[masks]
        indt = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[maskt]
        # find correspondence using kdtree
        tree = KDTree(pct)
        IdxQuery=np.random.choice(range(pcs.shape[0]),5000)
        # sample 5000 query points
        pcsQuery = pcs[IdxQuery,:]
        pcsQueryid = inds[IdxQuery]
        nearest_dist, nearest_ind = tree.query(pcsQuery, k=1)
        hasCorres=(nearest_dist < 0.08)
        idxTgtNeg=[]
        idxSrc= np.stack((pcsQueryid[hasCorres[:,0]] % self.Inputwidth, pcsQueryid[hasCorres[:,0]]// self.Inputwidth),1)
        idxTgt= np.stack((indt[nearest_ind[hasCorres]] % self.Inputwidth, indt[nearest_ind[hasCorres]] // self.Inputwidth),1)
        
        if hasCorres.sum() < 200:
          rets['denseCorres']={'idxSrc':np.zeros([1,500,2]).astype('int'),'idxTgt':np.zeros([1,500,2]).astype('int'),'valid':np.array([0]),'idxTgtNeg':idxTgtNeg}
        else:

          idx2000 = np.random.choice(range(idxSrc.shape[0]),500)
          idxSrc=idxSrc[idx2000][np.newaxis,:]
          idxTgt=idxTgt[idx2000][np.newaxis,:]
          rets['denseCorres']={'idxSrc':idxSrc.astype('int'),'idxTgt':idxTgt.astype('int'),'valid':np.array([1]),'idxTgtNeg':idxTgtNeg}

      
      if self.pointcloud or self.local:

        #pc = self.depth2pc(imgs_depth[0][:,160:160*2]).T
        pc, mask = self.depth2pc(imgs_depth[0][100-48:100+48,200-64:200+64], needmask=True)
        
        # util.write_ply('test.ply',np.concatenate((pc,pc1)))
        idx_s = np.random.choice(range(len(pc)),self.num_points)
        mask_s = np.where(mask)[0][idx_s]
        
        imgPCid[0] = np.stack((idx_s % 128, idx_s // 128)).T
        pointcloud[0,:3,:] = pc[idx_s,:].T
        
        pc_n = imgs_normal[0][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask]
        pointcloud[0,3:6,:] = pc_n[idx_s,:].T
        
        pc_c = imgs_rgb[0][100-48:100+48,200-64:200+64].reshape(-1,3)[mask]
        pointcloud[0,6:9,:] = pc_c[idx_s,::-1].T
        
        pc_s = imgs_s[0][100-48:100+48,200-64:200+64].reshape(-1)[mask]
        pointcloud[0,9:10,:] = pc_s[idx_s]

        
        pc, mask = self.depth2pc(imgs_depth[1][100-48:100+48,200-64:200+64], needmask=True)
        idx_s = np.random.choice(range(len(pc)),self.num_points)
        mask_t = np.where(mask)[0][idx_s]
        
        imgPCid[1] = np.stack((idx_s % 128, idx_s // 128)).T
        pointcloud[1,:3,:] = pc[idx_s,:].T
        
        pc_n = imgs_normal[1][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask]
        pointcloud[1,3:6,:] = pc_n[idx_s,:].T
        
        pc_c = imgs_rgb[1][100-48:100+48,200-64:200+64].reshape(-1,3)[mask]
        pointcloud[1,6:9,:] = pc_c[idx_s,::-1].T
        
        pc_s = imgs_s[1][100-48:100+48,200-64:200+64].reshape(-1)[mask]
        pointcloud[1,9:10,:] = pc_s[idx_s]
        rets['pointcloud']=pointcloud[None,...]
        
      if self.plane_r:
        Key = '%s-plane' % (room_id)
        plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,9)
        Key = '%s-plane-validnum' % (room_id)
        valid_plane = np.frombuffer(self.txn.get(Key.encode()),np.uint8)[0]
        plane_eq = plane_eq_raw[:,3:7]
        plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0]))
        plane_center = plane_eq_raw[:,:3]
        plane_center = (np.matmul(R[0][:3,:3], plane_center.T) + R[0][:3,3:4]).T
        
        rets['plane']=plane_eq[np.newaxis,:]
        rets['plane_raw']=plane_eq_raw[np.newaxis,:]
        rets['plane_c']=plane_center[np.newaxis,:]
        rets['valid_plane']=valid_plane
      
      
      

      if self.local:
        # sample point-level relation from plane relation
        
        try:        
          R_s2t = np.matmul(R[1], R_inv[0])
          pointcloud[0,:3,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,:3,:]) + R_s2t[:3,3:4]
          pointcloud[0,3:6,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,3:6,:])
          if self.eval_local:
            N_PAIR_PTS = 6000
          else:
            N_PAIR_PTS = 1000
          N_PAIR_EXCEED_PTS = N_PAIR_PTS*10
          ANGLE_THRESH = 5.0
          PERP_THRESH = np.cos(np.deg2rad(90-ANGLE_THRESH))
          PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH))
          COPLANE_THRESH = 0.05
          rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS])

          ind_s = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS)
          ind_t = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS)

          pair_pts = np.stack((ind_s, ind_t), -1)
          normdot = (pointcloud[0, 3:6, pair_pts[:,0]] * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1)
          dst = (np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1)) + 
              np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[0, 3:6, pair_pts[:,0]]).sum(1)))/2
          rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1
          rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst > COPLANE_THRESH)] = 2
          rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst <= COPLANE_THRESH)] = 3

          if self.split == 'train':
            # balance each class
            N_CLASS = 4
            pair_pts_select=[]
            for j in range(N_CLASS):
              ind = np.where(rel_cls_pts == j)[0]
              if len(ind):
                pair_pts_select.append(ind[np.random.choice(len(ind), N_PAIR_PTS//N_CLASS)])
            pair_pts_select = np.concatenate(pair_pts_select)
            
            pair_pts_select =pair_pts_select[np.random.choice(len(pair_pts_select), N_PAIR_PTS)]
            pair_pts = pair_pts[pair_pts_select]
            normdot = normdot[pair_pts_select]
            dst = dst[pair_pts_select]
            rel_cls_pts = rel_cls_pts[pair_pts_select]
          else:
            pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS)
            pair_pts = pair_pts[pair_pts_select]
            normdot = normdot[pair_pts_select]
            dst = dst[pair_pts_select]
            rel_cls_pts = rel_cls_pts[pair_pts_select]



          rets['normdot2'] = np.power(normdot,2)[None,:]
          rets['dst2'] = np.power(dst,2)[None,:]
          # convert to image coordinate 
          
          if 1:
            R_s2t = np.matmul(R[1], R_inv[0])
            R_t2s = np.linalg.inv(R_s2t)
            tp = (np.matmul(R_t2s[:3,:3], pointcloud[0, :3, pair_pts[:,0]].T)+R_t2s[:3,3:4]).T
            hfov = 120.0
            vfov = 2*np.arctan(np.tan(hfov/2/180*np.pi)*200/400)/np.pi*180

            zs = -tp[:,2]
            ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 
            xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128
            uv_s = np.stack((xs, ys), -1)
            tp = pointcloud[1, :3, pair_pts[:,1]]
            zs = -tp[:,2]
            ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 
            xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128
            uv_t = np.stack((xs, ys), -1)
            rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :]
            rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :, 0].clip(0, 128-1)
            rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :, 1].clip(0, 96-1)
            rets['uv_pts'] = rets['uv_pts'].astype('int')
        except:
          import ipdb;ipdb.set_trace()
        
        
          rel_cls = np.array(rel_cls)
          rel_dst = np.array(rel_dst)
          rel_ndot = np.array(rel_ndot)
          pair = np.concatenate(pair).reshape(-1, 2)
          

            
          # padding f
          MAX_PAIR = 100
          MAX_PLANE = 20
          plane_params1 = np.array(plane_params1)
          plane_params2 = np.array(plane_params2)
          if len(plane_params1) <= MAX_PLANE:
            plane_params1 = np.concatenate((plane_params1, np.zeros([MAX_PLANE - len(plane_params1), 5])))
            plane_center1 = np.concatenate((plane_center1, np.zeros([MAX_PLANE - len(plane_center1), 6])))
          else:
            plane_params1 = plane_params1[:MAX_PLANE]
            plane_center1 = plane_center1[:MAX_PLANE]
            select = (pair[:, 0] < MAX_PLANE)
            pair = pair[select]
            rel_cls = rel_cls[select]
            rel_dst = rel_dst[select]
            rel_ndot = rel_ndot[select]
          if len(plane_params2) <= MAX_PLANE:
            plane_params2 = np.concatenate((plane_params2, np.zeros([MAX_PLANE - len(plane_params2), 5])))
            plane_center2 = np.concatenate((plane_center2, np.zeros([MAX_PLANE - len(plane_center2), 6])))
          else:
            plane_params2 = plane_params2[:MAX_PLANE]
            plane_center2 = plane_center2[:MAX_PLANE]
            select = (pair[:, 1] < MAX_PLANE)
            pair = pair[select]
            rel_cls = rel_cls[select]
            rel_dst = rel_dst[select]
            rel_ndot = rel_ndot[select]
          rel_valid = np.zeros([MAX_PAIR])
          if len(rel_cls) < MAX_PAIR:
            rel_valid[:len(rel_cls)] = 1
            rel_cls = np.concatenate((rel_cls, np.zeros([MAX_PAIR - len(rel_cls)])))
            rel_dst = np.concatenate((rel_dst, np.zeros([MAX_PAIR - len(rel_dst)])))
            rel_ndot = np.concatenate((rel_ndot, np.zeros([MAX_PAIR - len(rel_ndot)])))
            pair = np.concatenate((pair, np.zeros([MAX_PAIR - len(pair), 2])))
          else:
            pair = pair[:MAX_PAIR]
            rel_cls = rel_cls[:MAX_PAIR]
            rel_dst = rel_dst[:MAX_PAIR]
            rel_ndot = rel_ndot[:MAX_PAIR]
            rel_valid[:] = 1
          rets['plane_center'] = np.stack((plane_center1,plane_center2))[None,...]
          rets['pair'] = pair[None,...].astype('int')
          rets['rel_cls'] = rel_cls[None,...].astype('int')
          rets['rel_dst'] = rel_dst[None,...]
          rets['rel_ndot'] = rel_ndot[None,...]
          rets['rel_valid'] = rel_valid[None,...]
          rets['plane_idx'] = np.stack((plane_idx1,plane_idx2))[None,...].astype('int')
        
        rets['rel_cls_pts'] = rel_cls_pts[None, :]
        rets['pair_pts'] = pair_pts[None, :]
        if self.eval_local:
          
          # convert back into local coordinate 
          R_t2s = np.matmul(R[0], R_inv[1])
          
          
          Kth = self.dataList[index % self.__len__()]['Kth']
          pointcloud[0,:3,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,:3,:]) + R_t2s[:3,3:4]
          pointcloud[0,3:6,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,3:6,:])
          
          R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pred_pose']
          gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['gt_pose']
          err_r = util.angular_distance_np(R_pred[:3,:3],gt_pose[:3,:3])[0]
          rets['err_r'] = err_r
          
          rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)
          pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_s_360']
          pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_t_360']
          nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_s_360']
          nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_t_360']
          feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_s_360']
          feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_t_360']
            
          # transform source
          pos_s_360 = (np.matmul(R_pred[:3,:3], pos_s_360.T) + R_pred[:3,3:4]).T
          nor_s_360 = np.matmul(R_pred[:3,:3], nor_s_360.T).T
          
          
          # find top correspondence 
          if 0:
            tree = KDTree(pos_s_360)
            nearest_dist1, nearest_ind1 = tree.query(pos_t_360, k=1)
            nearest_ind1 = nearest_ind1.squeeze()
            tree = KDTree(pos_t_360)
            nearest_dist2, nearest_ind2 = tree.query(pos_s_360, k=1)
            nearest_ind2 = nearest_ind2.squeeze()
            # if nearest_ind1[nearest_ind2] == np.range(len(feat_s_360))
            rets['pos_s_360'] = (pos_s_360[nearest_ind1][None,:])
            rets['pos_t_360'] = (pos_t_360[None,:])
            rets['nor_s_360'] = (nor_s_360[nearest_ind1][None,:])
            rets['nor_t_360'] = (nor_t_360[None,:])

          if 1:
            rets['pos_s_360'] = (pos_s_360[None,:])
            rets['pos_t_360'] = (pos_t_360[None,:])
            rets['nor_s_360'] = (nor_s_360[None,:])
            rets['nor_t_360'] = (nor_t_360[None,:])
          
          
          pointcloud[0,:3,:] = np.matmul(R_pred[:3,:3], pointcloud[0,:3,:]) + R_pred[:3,3:4]
          pointcloud[0,3:6,:] = np.matmul(R_pred[:3,:3], pointcloud[0,3:6,:])

          
          color_t_360 = np.tile(np.array([0,1,0])[None,:], [len(pos_t_360),1])

          igt = np.matmul(R_s2t, np.linalg.inv(R_pred))
          rets['igt'] = igt[None,:]
          rets['pred_pose'] = R_pred[None,:]
          rets['gt_pose'] = gt_pose[None,:]
          R_gt = igt[:3,:3]
          t_gt = igt[:3,3:4]
        else:
          delta_R = util.randomRotation(epsilon=0.1*3)
          delta_t = np.random.randn(3)*0.1
          
          pointcloud_s_perturb = np.matmul(delta_R, pointcloud[0,:3,:] - pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None] + pointcloud[0,:3,:].mean(1)[:,None]
          tp_R = delta_R 
          tp_t = np.matmul(np.eye(3) - delta_R, pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None]

          t_gt = np.matmul(np.eye(3) - delta_R.T, pointcloud[0,:3,:].mean(1)[:,None]) - np.matmul(delta_R.T, delta_t[:, None])
          R_gt = delta_R.T
          igt = np.eye(4)
          igt[:3,:3] = R_gt
          igt[:3,3] = t_gt.squeeze()
          rets['igt'] = igt[None,:]
          pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0,3:6,:])
          # np.matmul(R_gt, pointcloud_s_perturb) + t_gt
          
          if self.local_method == 'patch':
            plane_params1[:,:4] = np.matmul(plane_params1[:,:4], igt)
          Q = np.concatenate((util.rot2Quaternion(R_gt),t_gt.squeeze()))
          R_ = np.eye(4)
          R_[:3, :3] = R_gt
          R_[:3, 3] = t_gt.squeeze()
          R_inv = np.linalg.inv(R_)
          
          pointcloud[0,:3,:] = pointcloud_s_perturb
          pointcloud[0,3:6,:] = pointcloud_s_n_perturb
        rets['pointcloud']=pointcloud[None,...]
        
        
      if self.topdown:
        
        Key = '%s-pc' % (room_id)
        roompc = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,3)
        roompc = roompc[np.random.choice(roompc.shape[0],20000)]
        rets['roompc'] = roompc[None,:]

        Key = '%s-floor' % (room_id)
        plane_eq = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(4)
        plane_eqs = np.zeros([2, 4])
        plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0]))
        plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3])+1e-16)
        plane_eqs[0, :] = plane_eq_0.copy()
        plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1]))
        plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16)
        plane_eqs[1, :] = plane_eq_1.copy()

        colors = np.random.rand(21,3)

        resolution = 0.03

        height = 224
        width = 224

        pc0 = pointcloud[0,0:3,:].T
        pc2ind = np.zeros([2, len(pc0), 3])
        
        npts = np.zeros([2])
        pc2ind_mask = np.zeros([2, pointcloud.shape[2]])

        # the floor plane
        # (0, 1, 0)'x + d = 0
        
        # remove partial view's ceiling 
        dst = np.abs(((plane_eq_0[:3][None,:] * pc0).sum(1) + plane_eq_0[3]))
        mask = dst < 1.5 
        # reorder pointcloud[0]
        
        validind = np.where(mask)[0]
        invalidind = np.where(~mask)[0]
        #pointcloud[0] = np.concatenate((pointcloud[0,:,validind].T,pointcloud[0,:,invalidind].T), -1)
        npts[0] = len(validind)
        pc0 = pc0[mask]
        pc2ind_mask[0] = mask

        # project camera position(0,0,0) to floor plane 
        origin_0 = -plane_eq_0[:3] * plane_eq_0[3]
        # axis [0,0,-1], []
        axis_base = np.array([0,0,-1])
        axis_y_0 = axis_base - np.dot(axis_base,plane_eq_0[:3]) * plane_eq_0[:3]
        axis_y_0 /= (np.linalg.norm(axis_y_0)+1e-16)
        axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3])
        axis_x_0 /= (np.linalg.norm(axis_x_0)+1e-16)
        axis_z_0 = plane_eq_0[:3]

        
        imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_c_partial_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.
        imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_c_partial_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.

        imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_c_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.
        imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_c_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.

        
        imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_s_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')
        imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        topdown_s_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')

        
        
        tp = ~topdown_c_partial_0.sum(2).astype('bool')
        edt_0 = ndimage.distance_transform_edt(tp, return_indices=False)
        edt_0 = np.maximum(0.1, np.power(0.98, edt_0))
        tp = ~topdown_c_partial_1.sum(2).astype('bool')
        edt_1 = ndimage.distance_transform_edt(tp, return_indices=False)
        edt_1 = np.maximum(0.1, np.power(0.98, edt_1))
        rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...]
        
        
        u = ((pc0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1)
        v = ((pc0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1)
        z = ((pc0 - origin_0[None,:]) * axis_z_0[None,:]).sum(1)

        u = width//2 + (u / resolution).astype('int')
        v = height//2 - (v / resolution).astype('int')
        ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
        topdown_ind_0 = np.stack((u, v, ind_z), -1)


        u = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_x_0[None,:]).sum(1)
        v = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_y_0[None,:]).sum(1)
        z = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_z_0[None,:]).sum(1)
        u = width//2 + (u / resolution).astype('int')
        v = height//2 - (v / resolution).astype('int')
        ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
        topdown_ind_img_0 = np.stack((u, v, ind_z), -1)



        pc2ind[0,mask] = topdown_ind_0
        pc1 = pointcloud[1,0:3,:].T
        plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1]))
        plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16)
        plane_eqs[1, :] = plane_eq_1.copy()
        dst = np.abs(((plane_eq_1[:3][None,:] * pc1).sum(1) + plane_eq_1[3]))
        mask = dst < 1.5 
        
        validind = np.where(mask)[0]
        invalidind = np.where(~mask)[0]
        #pointcloud[1] = np.concatenate((pointcloud[1,:,validind].T,pointcloud[1,:,invalidind].T), -1)
        npts[1] = len(validind)
        pc1 = pc1[mask]
        pc2ind_mask[1] = mask
        
        origin_1 = -plane_eq_1[:3] * plane_eq_1[3]
        # axis [0,0,-1], []
        axis_base = np.array([0,0,-1])
        axis_y_1 = axis_base - np.dot(axis_base,plane_eq_1[:3]) * plane_eq_1[:3]
        axis_y_1 /= (np.linalg.norm(axis_y_1)+1e-16)
        axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3])
        axis_x_1 /= (np.linalg.norm(axis_x_1)+1e-16)
        axis_z_1 = plane_eq_1[:3]

        u = ((pc1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1)
        v = ((pc1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1)
        z = ((pc1 - origin_1[None,:]) * axis_z_1[None,:]).sum(1)
        # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s])

        u = width//2 + (u / resolution).astype('int')
        v = height//2 - (v / resolution).astype('int')
        ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
        topdown_ind_1 = np.stack((u, v, ind_z), -1)


        u = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_x_1[None,:]).sum(1)
        v = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_y_1[None,:]).sum(1)
        z = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_z_1[None,:]).sum(1)
        u = width//2 + (u / resolution).astype('int')
        v = height//2 - (v / resolution).astype('int')
        ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
        topdown_ind_img_1 = np.stack((u, v, ind_z), -1)

        img2ind[0] = topdown_ind_img_0
        img2ind[1] = topdown_ind_img_1
        pc2ind[1,mask] = topdown_ind_1
        rets['img2ind'] = img2ind[None,...]
        rets['imgPCid'] = imgPCid[None,...]
        rets['axis_x'] = np.zeros([2,3])
        rets['axis_y'] = np.zeros([2,3])
        rets['origin'] = np.zeros([2,3])
        

        rets['axis_x'][0] = axis_x_0
        rets['axis_y'][0] = axis_y_0
        rets['axis_x'][1] = axis_x_1
        rets['axis_y'][1] = axis_y_1
        rets['origin'][0] = origin_0
        rets['origin'][1] = origin_1
        rets['axis_x'] = rets['axis_x'][None,:]
        rets['axis_y'] = rets['axis_y'][None,:]
        rets['origin'] = rets['origin'][None,:]
        # sample points on source floor plane:
        if 1:
          #mask = ~((topdown_c_complete_0==0).sum(2)==3)
          
          mask = ~((topdown_c_partial_0==0).sum(2)==3)
          vs, us = np.where(mask)
          if not len(vs):
              vs = np.array([0,0])
              us = np.array([0,0])
          ind = np.random.choice(len(vs), 100)
          u_0 = us[ind]
          v_0 = vs[ind]

          kp_uv_0 = np.stack((u_0,v_0),-1)
          u_0 -= width//2
          v_0 -= height//2
         

          kp_3d_0 = origin_0[None,:] + axis_x_0[None,:] * u_0[:,None] * resolution - axis_y_0[None,:] * v_0[:,None] * resolution

          R01 = np.matmul(R[1], R_inv[0])
          kp_3d_1 = (np.matmul(R01[:3,:3], kp_3d_0.T) + R01[:3,3:4]).T

          # random sample a set of points as negative correspondencs 
          if 1:
            mask = ~((topdown_c_partial_1==0).sum(2)==3)
            vs_neg, us_neg = np.where(mask)
            if not len(vs_neg):
                vs_neg = np.array([0,0])
                us_neg = np.array([0,0])
            ind = np.random.choice(len(vs_neg), 100*100)
            u_neg_1 = us_neg[ind]
            v_neg_1 = vs_neg[ind]
            
            kp_uv_neg_1 = np.stack((u_neg_1,v_neg_1),-1)
            u_neg_1 -= width//2
            v_neg_1 -= height//2
            kp_3d_neg_1 = origin_1[None,:] + axis_x_1[None,:] * u_neg_1[:,None] * resolution - axis_y_1[None,:] * v_neg_1[:,None] * resolution
            R10 = np.matmul(R[0], R_inv[1])
            kp_3d_neg_0 = (np.matmul(R10[:3,:3], kp_3d_neg_1.T) + R10[:3,3:4]).T
            u_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1)
            v_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1)
            u_neg_0 = width//2 + (u_neg_0 / resolution).astype('int')
            v_neg_0 = height//2 - (v_neg_0 / resolution).astype('int')
            kp_uv_neg_0 = np.stack((u_neg_0,v_neg_0),-1)
            kp_uv_neg_0[:,0] = kp_uv_neg_0[:,0].clip(0, width-1)
            kp_uv_neg_0[:,1] = kp_uv_neg_0[:,1].clip(0, height-1)
            kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2)
            kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2)
          w_uv_neg_1 = 1 - np.maximum(0.1, np.power(0.98, np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2)))
          
          
          u_1 = ((kp_3d_1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1)
          v_1 = ((kp_3d_1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1)
          u_1 = width//2 + (u_1 / resolution).astype('int')
          v_1 = height//2 - (v_1 / resolution).astype('int')
          kp_uv_1 = np.stack((u_1,v_1),-1)
          
          # visualize correspondence 
          if 0:
            img_vis = (np.concatenate((topdown_c_complete_0,topdown_c_complete_1))*255).astype('uint8')
            for j in range(10):
              ind = np.random.choice(len(kp_uv_0),1)[0]
              img_vis = cv2.line(img_vis, (kp_uv_0[ind][0], kp_uv_0[ind][1]), (kp_uv_1[ind][0], kp_uv_1[ind][1]+topdown_c_complete_0.shape[0]), (255,255,0))
            cv2.imwrite('test.png',img_vis)

        topdown_c_complete = np.stack((topdown_c_complete_0, topdown_c_complete_1)).transpose(0,3,1,2)
        topdown_s_complete = np.stack((topdown_s_complete_0, topdown_s_complete_1))
        topdown_c_partial = np.stack((topdown_c_partial_0, topdown_c_partial_1))
 
        kp_uv_0[:,0] = kp_uv_0[:,0].clip(0, width-1)
        kp_uv_0[:,1] = kp_uv_0[:,1].clip(0, height-1)
        kp_uv_1[:,0] = kp_uv_1[:,0].clip(0, width-1)
        kp_uv_1[:,1] = kp_uv_1[:,1].clip(0, height-1)
        rets['kp_uv'] = np.stack((kp_uv_0,kp_uv_1))[None,...]
        rets['kp_uv_neg'] = kp_uv_neg_1[None,...]
        rets['w_uv_neg'] = w_uv_neg_1[None,...]
        rets['plane_eq'] = plane_eqs[None,...]
        rets['pc2ind'] = pc2ind[None,...]

        rets['pc2ind_mask'] = pc2ind_mask[None,...]
        rets['topdown'] = topdown_c_complete[None,...]
        rets['topdown_s'] = topdown_s_complete[None,...]
        rets['topdown_partial'] = topdown_c_partial.transpose(0,3,1,2)[None,...]
        
        TopDownValidMask = ((topdown_c_complete==0).sum(1,keepdims=True)!=3)
        rets['TopDownValidMask'] = TopDownValidMask[None,...]
        
        rets['npts'] = npts[None,...]


      imgsPath.append(f"{basePath}/{ct0:06d}")
      imgsPath.append(f"{basePath}/{ct1:06d}")
      
      rets['norm']=imgs_normal.transpose(0,3,1,2)[None,...]
      rets['rgb']=imgs_rgb.transpose(0,3,1,2)[None,...]
      rets['semantic']=imgs_s[None,...]
      rets['depth']=imgs_depth[None,:,None,:,:]
      rets['Q']=Q[None,...]
      rets['R']=R[None,...]
      rets['R_inv'] = R_inv[None,...]
      rets['imgsPath']=imgsPath
      

      return rets, True
Exemple #2
0
    def __getitem__(self, index):
        rets = {}
        imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]),
                         dtype=np.float32)
        imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth),
                        dtype=np.float32)
        if self.rgbd:
            imgs_rgb = np.zeros(
                (self.nViews, self.Inputheight, self.Inputwidth, 3),
                dtype=np.float32)
            imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]),
                                 dtype=np.float32)
        if self.hmap:
            hmap = np.zeros((self.nViews, 3, 64, 64), dtype=np.float32)
        if self.birdview:
            imgs_bv = np.zeros(
                (self.nViews, self.Inputheight, self.Inputwidth, 3),
                dtype=np.float32)
            imgs_bv_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]),
                                dtype=np.float32)
        if self.pointcloud:
            pointcloud = np.zeros((self.nViews, 3, self.num_points),
                                  dtype=np.float32)
        R = np.zeros((self.nViews, 4, 4))
        Q = np.zeros((self.nViews, 7))
        assert (self.nViews == 2)
        imgsPath = []
        if self.AuthenticdepthMap:
            AuthenticdepthMap = np.zeros((self.nViews, *self.OutputSize[::-1]),
                                         dtype=np.float32)
        ct0, ct1 = self.__getpair__(index)

        if self.segm:
            segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        if self.normal:
            normal = np.zeros(
                (self.nViews, 3, self.Inputheight, self.Inputwidth),
                dtype=np.float32)

        basePath = self.base_this
        frameid0 = f"{ct0:06d}"
        frameid1 = f"{ct1:06d}"

        imgs[0] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy()
        imgs[1] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy()
        dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        dataMask[0, 0, :, :] = (imgs[0] != 0)
        dataMask[1, 0, :, :] = (imgs[1] != 0)
        rets['dataMask'] = dataMask[np.newaxis, :]

        if self.pointcloud:
            pc = util.DepthToPointCloud(imgs[0], self.intrinsicUnNorm)
            pointcloud[0] = pc[
                np.random.choice(range(len(pc)), self.num_points), :].T
            pc = util.DepthToPointCloud(imgs[1], self.intrinsicUnNorm)
            pointcloud[1] = pc[
                np.random.choice(range(len(pc)), self.num_points), :].T
        if self.birdview:
            imgs_bv[0] = self.LoadImage(os.path.join(
                basePath, 'BirdView', '{}.birdview.png'.format(frameid0)),
                                        depth=False).copy() / 255.
            imgs_bv[1] = self.LoadImage(os.path.join(
                basePath, 'BirdView', '{}.birdview.png'.format(frameid1)),
                                        depth=False).copy() / 255.
        if self.rgbd:
            imgs_rgb[0] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid0)),
                                         depth=False).copy() / 255.
            imgs_rgb[1] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid1)),
                                         depth=False).copy() / 255.

        R[0] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid0 + '.pose.txt'))
        R[1] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid1 + '.pose.txt'))
        #R[1] = R[0] = np.eye(4)
        Q[0, :4] = rot2Quaternion(R[0][:3, :3])
        Q[0, 4:] = R[0][:3, 3]
        Q[1, :4] = rot2Quaternion(R[1][:3, :3])
        Q[1, 4:] = R[1][:3, 3]

        if self.normal:
            normal[0] = self.LoadImage(
                os.path.join(basePath, 'normal', '{}.png'.format(frameid0)),
                depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1
            normal[1] = self.LoadImage(
                os.path.join(basePath, 'normal', '{}.png'.format(frameid1)),
                depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1
            #print(f"normalmean:{np.mean(np.power(normal[0],2).sum(0))},{np.mean(np.power(normal[1],2).sum(0))}\n")
            if self.normal_pyramid:
                a = int(outS(self.height))  #41
                b = int(outS(self.height * 0.5 + 1))  #21
                normal_ = [
                    resize_label_batch(normal.transpose(2, 3, 1, 0),
                                       i).transpose(3, 2, 0, 1)
                    for i in [a, a, b, a]
                ]
                normal_ = [
                    m.reshape(1, self.nViews, 3, m.shape[2], m.shape[3])
                    for m in normal_
                ]
            else:
                normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]),
                                   dtype=np.float32)
                normal_[0] = cv2.resize(
                    normal[0].transpose(1, 2, 0),
                    self.OutputSize,
                    interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1)
                normal_[1] = cv2.resize(
                    normal[1].transpose(1, 2, 0),
                    self.OutputSize,
                    interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1)
                normal_ = normal_[np.newaxis, :]

        if self.denseCorres:
            # get 3d point cloud for each pano
            pcs = self.Pano2PointCloud(
                imgs[0])  # be aware of the order of returned pc!!!
            pct = self.Pano2PointCloud(imgs[1])
            #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:]
            pct = np.matmul(np.linalg.inv(R[1]),
                            np.concatenate(
                                (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pcs = np.matmul(np.linalg.inv(R[0]),
                            np.concatenate(
                                (pcs, np.ones([1, pcs.shape[1]]))))[:3, :]
            # find correspondence using kdtree
            tree = KDTree(pct.T)
            IdxQuery = np.random.choice(range(pcs.shape[1]), 5000)
            # sample 5000 query points
            pcsQuery = pcs[:, IdxQuery]
            nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1)
            hasCorres = (nearest_dist < 0.08)
            idxTgtNeg = []

            idxSrc = self.PanoIdx(IdxQuery[np.where(hasCorres)[0]], 160, 640)
            idxTgt = self.PanoIdx(nearest_ind[hasCorres], 160, 640)
            if hasCorres.sum() < 500:
                rets['denseCorres'] = {
                    'idxSrc': np.zeros([1, 2000, 2]),
                    'idxTgt': np.zeros([1, 2000, 2]),
                    'valid': np.array([0]),
                    'idxTgtNeg': idxTgtNeg
                }

            else:
                # only pick 2000 correspondence per pair
                idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000)
                idxSrc = idxSrc[idx2000][np.newaxis, :]
                idxTgt = idxTgt[idx2000][np.newaxis, :]
                rets['denseCorres'] = {
                    'idxSrc': idxSrc,
                    'idxTgt': idxTgt,
                    'valid': np.array([1]),
                    'idxTgtNeg': idxTgtNeg
                }

        # reprojct the second image into the first image plane
        if self.reproj:
            h = imgs.shape[1]
            colorpct = []
            normalpct = []
            depthpct = []
            for ii in range(4):
                colorpct.append(imgs_rgb[1, :, ii * h:(ii + 1) * h, :].reshape(
                    -1, 3))
                normalpct.append(normal_[0, 1, :, :,
                                         ii * h:(ii + 1) * h].reshape(3, -1))
                depthpct.append(imgs[1, :, ii * h:(ii + 1) * h].reshape(-1))
            colorpct = np.concatenate(colorpct, 0)
            normalpct = np.concatenate(normalpct, 1)
            depthpct = np.concatenate(depthpct)
            # get the coordinates of each point in the first coordinate system
            pct = self.Pano2PointCloud(
                imgs[1])  # be aware of the order of returned pc!!!
            R_this = np.matmul(R[0], np.linalg.inv(R[1]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1

            t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p))

            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate((pct, np.ones([1,
                                                      pct.shape[1]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            #if np.abs(pct).min()==0:
            #    import ipdb;ipdb.set_trace()
            # assume always observe the second view(right view)
            colorpct = colorpct[h * h:h * h * 2, :]
            depthpct = depthpct[h * h:h * h * 2]
            normalpct = normalpct[:, h * h:h * h * 2]
            #normalpct=np.matmul(R_this[:3,:3], normalpct).T # used to be a mistake!
            normalpct = np.matmul(R_this_p[:3, :3], normalpct).T
            pct_reproj = pct_reproj[:, h * h:h * h * 2]
            pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2]
            flow = flow[:, h * h:h * h * 2].T

            t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            t2s_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            t2s_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')

            t2s_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            t2s_mask_p = (t2s_d_p != 0).astype('int')

            #import ipdb;ipdb.set_trace()
            colorpct = []
            normalpct = []
            depthpct = []
            for ii in range(4):
                colorpct.append(imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape(
                    -1, 3))
                normalpct.append(normal_[0, 0, :, :,
                                         ii * h:(ii + 1) * h].reshape(3, -1))
                depthpct.append(imgs[0, :, ii * h:(ii + 1) * h].reshape(-1))
            colorpct = np.concatenate(colorpct, 0)
            normalpct = np.concatenate(normalpct, 1)
            depthpct = np.concatenate(depthpct)
            # get the coordinates of each point in the first coordinate system
            pct = self.Pano2PointCloud(
                imgs[0])  # be aware of the order of returned pc!!!
            R_this = np.matmul(R[1], np.linalg.inv(R[0]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]
            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1
            s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p))
            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate((pct, np.ones([1,
                                                      pct.shape[1]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            # assume always observe the second view(right view)
            colorpct = colorpct[h * h:h * h * 2, :]
            depthpct = depthpct[h * h:h * h * 2]
            normalpct = normalpct[:, h * h:h * h * 2]
            normalpct = np.matmul(R_this_p[:3, :3], normalpct).T
            pct_reproj = pct_reproj[:, h * h:h * h * 2]
            pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2]
            flow = flow[:, h * h:h * h * 2].T

            s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            s2t_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            s2t_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')
            s2t_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            s2t_mask_p = (s2t_d_p != 0).astype('int')

            # compute an envelop box
            try:
                tp = np.where(t2s_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(t2s_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1
            t2s_box_p = np.zeros(t2s_d_p.shape)
            t2s_box_p[h0:h1, w0:w1] = 1

            try:
                tp = np.where(s2t_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(s2t_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1
            s2t_box_p = np.zeros(s2t_d_p.shape)
            s2t_box_p[h0:h1, w0:w1] = 1

            rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :]
            rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p),
                                         0).transpose(0, 3, 1,
                                                      2)[np.newaxis, :]
            rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_rgb_p'] = np.stack(
                (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p),
                                        0).reshape(1, 2, 1, t2s_d_p.shape[0],
                                                   t2s_d_p.shape[1])
            rets['proj_mask_p'] = np.stack(
                (t2s_mask_p, s2t_mask_p),
                0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1])
            rets['proj_box_p'] = np.stack(
                (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0],
                                                   t2s_box_p.shape[1])

        if self.segm:
            segm[0] = (self.LoadImage(os.path.join(basePath, 'semanticLabel',
                                                   '{}.png'.format(frameid0)),
                                      depth=False)[:, :,
                                                   0:1].copy()).transpose(
                                                       2, 0, 1)
            segm[1] = (self.LoadImage(os.path.join(basePath, 'semanticLabel',
                                                   '{}.png'.format(frameid1)),
                                      depth=False)[:, :,
                                                   0:1].copy()).transpose(
                                                       2, 0, 1)
            segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                             dtype=np.float32)
            segm_[0] = segm[0]
            segm_[1] = segm[1]
            segm_ = segm_[np.newaxis, :]

        imgsPath.append(f"{basePath}/{ct0:06d}")
        imgsPath.append(f"{basePath}/{ct1:06d}")

        for v in range(self.nViews):
            imgs_[v] = cv2.resize(imgs[v],
                                  self.OutputSize,
                                  interpolation=cv2.INTER_NEAREST)
            if self.rgbd:
                imgs_rgb_[v] = cv2.resize(imgs_rgb[v],
                                          self.OutputSize).transpose(2, 0, 1)

        imgs_ = imgs_[np.newaxis, :]
        if self.hmap:
            hmap = hmap[np.newaxis, :]
        if self.rgbd:
            imgs_rgb_ = imgs_rgb_[np.newaxis, :]
        if self.birdview:
            imgs_bv_ = imgs_bv_[np.newaxis, :]
        if self.pointcloud:
            pointcloud = pointcloud[np.newaxis, :]
        R = R[np.newaxis, :]
        Q = Q[np.newaxis, :]

        if self.segm:
            rets['segm'] = segm_
        rets['interval'] = self.interval_this
        rets['norm'] = normal_
        rets['rgb'] = imgs_rgb_
        rets['depth'] = imgs_
        rets['Q'] = Q
        rets['R'] = R
        rets['imgsPath'] = imgsPath

        return rets
Exemple #3
0
    old = os.dup(1)
    sys.stdout.flush()
    os.close(1)
    os.open(logfile, os.O_WRONLY)

    shape_file = "{2}/{0}/{1}/models/model_normalized.obj".format(
        CATEGORY, MODEL, SHAPENETPATH)
    bpy.ops.import_scene.obj(filepath=shape_file)

    for m in bpy.data.materials:
        m.use_shadeless = True

    N = 100
    for i in range(N):
        # uniformly sample rotation angle
        rho, azim, elev, theta = util.randomRotation()
        camPos = util.objectCenteredCamPos(rho, azim, elev)
        q1 = util.camPosToQuaternion(camPos)
        q2 = util.camRotQuaternion(camPos, theta)
        q = util.quaternionProduct(q2, q1)

        util.setCameraExtrinsics(camera, camPos, q)
        q_extr, t_extr = util.cameraExtrinsicMatrix(q, camPos)

        # for ShapeNetCore.v2 all the objects are rotated 90 degrees
        # comment out this block if ShapeNetCore.v1 is used
        if i == 0:
            for o in bpy.data.objects:
                if o == camera: o.select = False
                else: o.select = True
            bpy.ops.transform.rotate(value=-np.pi / 2, axis=(0, 0, 1))
Exemple #4
0
    def __getitem__(self, index):
        rets = {}
        imgs = np.zeros((self.nViews, *self.OutputSize[::-1]),
                        dtype=np.float32)
        if self.rgbd:
            imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3),
                                dtype=np.float32)
        if self.segm:
            segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
            if self.dynamicWeighting:
                dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                                    dtype=np.float32)
        if self.normal:
            normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3),
                              dtype=np.float32)

        R = np.zeros((self.nViews, 4, 4))
        Q = np.zeros((self.nViews, 7))
        assert (self.nViews == 2)
        ct0, ct1 = self.__getpair__(index)
        imgsPath = []
        basePath = self.base_this
        frameid0 = f"{ct0:06d}"
        frameid1 = f"{ct1:06d}"

        if self.fullsize_rgbdn:
            imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3),
                                     dtype=np.float32)
            imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32)
            imgs_full[0] = self.LoadImage(
                os.path.join(basePath, 'obs_depth',
                             '{}.png'.format(frameid0))).copy()
            imgs_full[1] = self.LoadImage(
                os.path.join(basePath, 'obs_depth',
                             '{}.png'.format(frameid1))).copy()
            imgs_rgb_full[0] = self.LoadImage(os.path.join(
                basePath, 'obs_rgb', '{}.png'.format(frameid0)),
                                              depth=False).copy() / 255.
            imgs_rgb_full[1] = self.LoadImage(os.path.join(
                basePath, 'obs_rgb', '{}.png'.format(frameid1)),
                                              depth=False).copy() / 255.
            rets['rgb_full'] = imgs_rgb_full[np.newaxis, :]
            rets['depth_full'] = imgs_full[np.newaxis, :]

        imgs[0] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy()
        imgs[1] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy()
        dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        dataMask[0, 0, :, :] = (imgs[0] != 0)
        dataMask[1, 0, :, :] = (imgs[1] != 0)
        rets['dataMask'] = dataMask[np.newaxis, :]

        if self.rgbd:
            imgs_rgb[0] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid0)),
                                         depth=False).copy() / 255.
            imgs_rgb[1] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid1)),
                                         depth=False).copy() / 255.

        R[0] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid0 + '.pose.txt'))
        R[1] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid1 + '.pose.txt'))
        Q[0, :4] = rot2Quaternion(R[0][:3, :3])
        Q[0, 4:] = R[0][:3, 3]
        Q[1, :4] = rot2Quaternion(R[1][:3, :3])
        Q[1, 4:] = R[1][:3, 3]
        imgsPath.append(f"{basePath}/{ct0:06d}")
        imgsPath.append(f"{basePath}/{ct1:06d}")

        if self.normal:
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid0)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[0] = tp
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid1)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[1] = tp

        if self.segm:
            tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx',
                                              '{}.png'.format(frameid0)),
                                 depth=False).copy())[:, :, 1]
            segm[0] = tp.reshape(segm[0].shape)
            tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx',
                                              '{}.png'.format(frameid1)),
                                 depth=False).copy())[:, :, 1]

            segm[1] = tp.reshape(segm[1].shape)

            segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                             dtype=np.float32)
            segm_[0] = segm[0]
            segm_[1] = segm[1]
            segm_ = segm_[np.newaxis, :]

        if self.denseCorres:
            # get 3d point cloud for each pano
            pcs, masks = self.Pano2PointCloud(
                imgs[0],
                self.representation)  # be aware of the order of returned pc!!!
            pct, maskt = self.Pano2PointCloud(imgs[1], self.representation)

            #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:]
            pct = np.matmul(np.linalg.inv(R[1]),
                            np.concatenate(
                                (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pcs = np.matmul(np.linalg.inv(R[0]),
                            np.concatenate(
                                (pcs, np.ones([1, pcs.shape[1]]))))[:3, :]
            # find correspondence using kdtree
            tree = KDTree(pct.T)
            IdxQuery = np.random.choice(range(pcs.shape[1]), 5000)
            # sample 5000 query points
            pcsQuery = pcs[:, IdxQuery]
            nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1)
            hasCorres = (nearest_dist < 0.08)
            idxTgtNeg = []
            idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]],
                                  imgs.shape[1], imgs.shape[2],
                                  self.representation)
            idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1],
                                  imgs.shape[2], self.representation)

            if hasCorres.sum() < 200:
                rets['denseCorres'] = {
                    'idxSrc': np.zeros([1, 500, 2]),
                    'idxTgt': np.zeros([1, 500, 2]),
                    'valid': np.array([0]),
                    'idxTgtNeg': idxTgtNeg
                }

            else:
                # only pick 2000 correspondence per pair
                idx500 = np.random.choice(range(idxSrc.shape[0]), 500)
                idxSrc = idxSrc[idx500][np.newaxis, :]
                idxTgt = idxTgt[idx500][np.newaxis, :]

                rets['denseCorres'] = {
                    'idxSrc': idxSrc,
                    'idxTgt': idxTgt,
                    'valid': np.array([1]),
                    'idxTgtNeg': idxTgtNeg
                }

        # reprojct the second image into the first image plane
        if self.reproj:

            assert (imgs.shape[1] == 160 and imgs.shape[2] == 640)
            h = imgs.shape[1]

            pct, mask = util.depth2pc(
                imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44],
                'scannet')  # be aware of the order of returned pc!!!

            colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 +
                                44, :].reshape(-1, 3)[mask]
            normalpct = normal[1, 80 - 33:80 + 33,
                               160 + 80 - 44:160 + 80 + 44, :].reshape(-1,
                                                                       3)[mask]
            depthpct = imgs[1, 80 - 33:80 + 33,
                            160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask]

            R_this = np.matmul(R[0], np.linalg.inv(R[1]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1

            t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p))

            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T
            t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            t2s_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            t2s_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')

            t2s_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            t2s_mask_p = (t2s_d_p != 0).astype('int')

            pct, mask = util.depth2pc(
                imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44],
                'scannet')  # be aware of the order of returned pc!!!

            colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 +
                                44, :].reshape(-1, 3)[mask]
            normalpct = normal[0, 80 - 33:80 + 33,
                               160 + 80 - 44:160 + 80 + 44, :].reshape(-1,
                                                                       3)[mask]
            depthpct = imgs[0, 80 - 33:80 + 33,
                            160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask]

            R_this = np.matmul(R[1], np.linalg.inv(R[0]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1
            s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p))
            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            # assume always observe the second view(right view)

            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T

            s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            s2t_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            s2t_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')
            s2t_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            s2t_mask_p = (s2t_d_p != 0).astype('int')

            # compute an envelop box
            try:
                tp = np.where(t2s_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(t2s_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1
            t2s_box_p = np.zeros(t2s_d_p.shape)
            t2s_box_p[h0:h1, w0:w1] = 1

            try:
                tp = np.where(s2t_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(s2t_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1
            s2t_box_p = np.zeros(s2t_d_p.shape)
            s2t_box_p[h0:h1, w0:w1] = 1

            rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :]
            rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p),
                                         0).transpose(0, 3, 1,
                                                      2)[np.newaxis, :]
            rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_rgb_p'] = np.stack(
                (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p),
                                        0).reshape(1, 2, 1, t2s_d_p.shape[0],
                                                   t2s_d_p.shape[1])
            rets['proj_mask_p'] = np.stack(
                (t2s_mask_p, s2t_mask_p),
                0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1])
            rets['proj_box_p'] = np.stack(
                (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0],
                                                   t2s_box_p.shape[1])

        imgs = imgs[np.newaxis, :]
        if self.rgbd:
            imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3)
        if self.normal:
            normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3)
        R = R[np.newaxis, :]
        Q = Q[np.newaxis, :]
        if self.segm:
            rets['segm'] = segm_
            if self.dynamicWeighting:
                rets['dynamicW'] = dynamicW[np.newaxis, :]
        rets['interval'] = self.interval_this
        rets['norm'] = normal
        rets['rgb'] = imgs_rgb
        rets['depth'] = imgs
        rets['Q'] = Q
        rets['R'] = R
        rets['imgsPath'] = imgsPath
        return rets
Exemple #5
0
    def __getitem__(self, index):
        rets = {}
        imgs = np.zeros((self.nViews, *self.OutputSize[::-1]),
                        dtype=np.float32)
        if self.rgbd:
            imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3),
                                dtype=np.float32)
        if self.segm:
            segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
            if self.dynamicWeighting:
                dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                                    dtype=np.float32)
        if self.normal:
            normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3),
                              dtype=np.float32)
        if self.pointcloud:
            pointcloud = np.zeros(
                (self.nViews, 3 + 3 + 3 + 1, self.num_points),
                dtype=np.float32)
            pointcloud_flow = np.zeros((self.nViews, 3, self.num_points),
                                       dtype=np.float32)

        R = np.zeros((self.nViews, 4, 4))
        Q = np.zeros((self.nViews, 7))
        assert (self.nViews == 2)
        ct0, ct1 = self.__getpair__(index)
        imgsPath = []
        basePath = self.base_this
        frameid0 = f"{ct0:06d}"
        frameid1 = f"{ct1:06d}"

        if self.fullsize_rgbdn:
            imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3),
                                     dtype=np.float32)
            imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32)
            imgs_full[0] = self.LoadImage(
                os.path.join(basePath, 'obs_depth',
                             '{}.png'.format(frameid0))).copy()
            imgs_full[1] = self.LoadImage(
                os.path.join(basePath, 'obs_depth',
                             '{}.png'.format(frameid1))).copy()
            imgs_rgb_full[0] = self.LoadImage(os.path.join(
                basePath, 'obs_rgb', '{}.png'.format(frameid0)),
                                              depth=False).copy() / 255.
            imgs_rgb_full[1] = self.LoadImage(os.path.join(
                basePath, 'obs_rgb', '{}.png'.format(frameid1)),
                                              depth=False).copy() / 255.
            rets['rgb_full'] = imgs_rgb_full[np.newaxis, :]
            rets['depth_full'] = imgs_full[np.newaxis, :]

        imgs[0] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy()
        imgs[1] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy()
        dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        dataMask[0, 0, :, :] = (imgs[0] != 0)
        dataMask[1, 0, :, :] = (imgs[1] != 0)
        rets['dataMask'] = dataMask[np.newaxis, :]

        if self.rgbd:
            imgs_rgb[0] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid0)),
                                         depth=False).copy() / 255.
            imgs_rgb[1] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid1)),
                                         depth=False).copy() / 255.

        if self.scannet_new_name:
            tmp_basePath = basePath.replace('ScanNet_360', 'ScanNet')
        else:
            tmp_basePath = basePath

        R[0] = np.loadtxt(
            os.path.join(tmp_basePath, 'pose', frameid0 + '.pose.txt'))
        R[1] = np.loadtxt(
            os.path.join(tmp_basePath, 'pose', frameid1 + '.pose.txt'))
        Q[0, :4] = rot2Quaternion(R[0][:3, :3])
        Q[0, 4:] = R[0][:3, 3]
        Q[1, :4] = rot2Quaternion(R[1][:3, :3])
        Q[1, 4:] = R[1][:3, 3]
        imgsPath.append(f"{basePath}/{ct0:06d}")
        imgsPath.append(f"{basePath}/{ct1:06d}")

        if self.normal:
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid0)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[0] = tp
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid1)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[1] = tp

        if self.segm:
            tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx',
                                              '{}.png'.format(frameid0)),
                                 depth=False).copy())[:, :, 1]
            segm[0] = tp.reshape(segm[0].shape)
            tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx',
                                              '{}.png'.format(frameid1)),
                                 depth=False).copy())[:, :, 1]

            segm[1] = tp.reshape(segm[1].shape)

            segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                             dtype=np.float32)
            segm_[0] = segm[0]
            segm_[1] = segm[1]
            segm_ = segm_[np.newaxis, :]

        if self.denseCorres:
            # get 3d point cloud for each pano
            pcs, masks = self.Pano2PointCloud(
                imgs[0],
                self.representation)  # be aware of the order of returned pc!!!
            pct, maskt = self.Pano2PointCloud(imgs[1], self.representation)
            #import pdb; pdb.set_trace()
            #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:]
            pct = np.matmul(np.linalg.inv(R[1]),
                            np.concatenate(
                                (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pcs = np.matmul(np.linalg.inv(R[0]),
                            np.concatenate(
                                (pcs, np.ones([1, pcs.shape[1]]))))[:3, :]
            # find correspondence using kdtree
            tree = KDTree(pct.T)
            IdxQuery = np.random.choice(range(pcs.shape[1]), 5000)
            # sample 5000 query points
            pcsQuery = pcs[:, IdxQuery]
            nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1)
            hasCorres = (nearest_dist < 0.08)
            idxTgtNeg = []
            idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]],
                                  imgs.shape[1], imgs.shape[2],
                                  self.representation)
            idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1],
                                  imgs.shape[2], self.representation)

            if hasCorres.sum() < 200:
                rets['denseCorres'] = {
                    'idxSrc': np.zeros([1, 500, 2]),
                    'idxTgt': np.zeros([1, 500, 2]),
                    'valid': np.array([0]),
                    'idxTgtNeg': idxTgtNeg
                }

            else:
                # only pick 2000 correspondence per pair
                idx500 = np.random.choice(range(idxSrc.shape[0]), 500)
                idxSrc = idxSrc[idx500][np.newaxis, :]
                idxTgt = idxTgt[idx500][np.newaxis, :]

                rets['denseCorres'] = {
                    'idxSrc': idxSrc,
                    'idxTgt': idxTgt,
                    'valid': np.array([1]),
                    'idxTgtNeg': idxTgtNeg
                }

        imgPCid = np.zeros([2, self.num_points, 2])

        if self.pointcloud:
            try:
                pc = self.depth2pc(imgs[0][:, 160:160 * 2])

                idx_s = np.random.choice(range(len(pc)), self.num_points)

                imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T
                pointcloud[0, :3, :] = pc[idx_s, :].T

                pc_n = normal[0][:, 160:160 * 2, :].reshape(-1, 3)
                pointcloud[0, 3:6, :] = pc_n[idx_s, :].T

                pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3)

                pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T

                #pc_s = imgs_s[0,:,160:160*2].reshape(-1)+1
                #pointcloud[0,9:10,:] = pc_s[idx_s]

                pc = self.depth2pc(imgs[1][:, 160:160 * 2])
                idx_s = np.random.choice(range(len(pc)), self.num_points)
                imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T
                pointcloud[1, :3, :] = pc[idx_s, :].T

                pc_n = normal[1][:, 160:160 * 2, :].reshape(-1, 3)
                pointcloud[1, 3:6, :] = pc_n[idx_s, :].T

                pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3)

                #pc_s = imgs_s[1,:, 160:160*2].reshape(-1)+1
                #pointcloud[1,9:10,:] = pc_s[idx_s]
            except:
                #import pdb; pdb.set_trace()
                pointcloud = np.zeros(
                    (self.nViews, 3 + 3 + 3 + 1, self.num_points),
                    dtype=np.float32)
                pointcloud_flow = np.zeros((self.nViews, 3, self.num_points),
                                           dtype=np.float32)
                print("this pair does not contain point cloud!")
        if self.plane_r:

            scene_id = basePath.split('/')[-1]

            plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_plane/train/' + scene_id + '.npy'
            if os.path.exists(plane_file):
                plane_eq_raw = np.load(plane_file)
                if plane_eq_raw.shape[0] < 6:
                    plane_eq_raw = np.concatenate([plane_eq_raw, plane_eq_raw],
                                                  axis=0)
                MAX_PLANE = 10
                plane_idx = np.argsort(plane_eq_raw[:, 7])

                plane_eq_raw = plane_eq_raw[plane_idx[-MAX_PLANE:]]
                truncate_num = plane_eq_raw[-6, 7] / 2
                plane_eq_raw = plane_eq_raw[plane_eq_raw[:, 7] > truncate_num]

                if plane_eq_raw.shape[0] < MAX_PLANE:
                    valid_plane = plane_eq_raw.shape[0]
                    plane_eq_raw = np.concatenate(
                        (plane_eq_raw,
                         np.zeros([
                             MAX_PLANE - plane_eq_raw.shape[0],
                             plane_eq_raw.shape[-1]
                         ])))
                else:
                    valid_plane = MAX_PLANE

                plane_eq = plane_eq_raw[:, 3:7]
                plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0]))
                plane_center = plane_eq_raw[:, :3]
                plane_center = (np.matmul(R[0][:3, :3], plane_center.T) +
                                R[0][:3, 3:4]).T

                #import pdb; pdb.set_trace()
            else:
                print("Missing plane data")
                import pdb
                pdb.set_trace()

        if self.plane_m:
            scene_id = basePath.split('/')[-1]

            plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_manual_plane/%s/' % self.split + scene_id + '.npy'

            plane_raw = np.load(plane_file, allow_pickle=True)

            plane_center = plane_raw[:, :3]
            plane_center = (np.matmul(R[0][:3, :3], plane_center.T) +
                            R[0][:3, 3:4]).T

            plane_normal = plane_raw[:, 3:6]
            #plane_normal = (np.matmul(R[0][:3,:3],plane_normal.T)+R[0][:3,3:4]).T
            plane_normal = np.matmul(plane_normal, np.linalg.inv(R[0][:3, :3]))

            rets['plane_c'] = plane_center[np.newaxis, :]
            rets['plane_n'] = plane_normal[np.newaxis, :]
            rets['plane_raw'] = plane_raw[np.newaxis, :]

        # reprojct the second image into the first image plane
        if self.reproj:

            assert (imgs.shape[1] == 160 and imgs.shape[2] == 640)
            h = imgs.shape[1]

            pct, mask = util.depth2pc(
                imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44],
                'scannet')  # be aware of the order of returned pc!!!

            colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 +
                                44, :].reshape(-1, 3)[mask]
            normalpct = normal[1, 80 - 33:80 + 33,
                               160 + 80 - 44:160 + 80 + 44, :].reshape(-1,
                                                                       3)[mask]
            depthpct = imgs[1, 80 - 33:80 + 33,
                            160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask]

            R_this = np.matmul(R[0], np.linalg.inv(R[1]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1

            t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p))

            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T
            t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            t2s_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            t2s_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')

            t2s_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            t2s_mask_p = (t2s_d_p != 0).astype('int')

            pct, mask = util.depth2pc(
                imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44],
                'scannet')  # be aware of the order of returned pc!!!

            colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 +
                                44, :].reshape(-1, 3)[mask]
            normalpct = normal[0, 80 - 33:80 + 33,
                               160 + 80 - 44:160 + 80 + 44, :].reshape(-1,
                                                                       3)[mask]
            depthpct = imgs[0, 80 - 33:80 + 33,
                            160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask]

            R_this = np.matmul(R[1], np.linalg.inv(R[0]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1
            s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p))
            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            # assume always observe the second view(right view)

            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T

            s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            s2t_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            s2t_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')
            s2t_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            s2t_mask_p = (s2t_d_p != 0).astype('int')

            # compute an envelop box
            try:
                tp = np.where(t2s_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(t2s_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1
            t2s_box_p = np.zeros(t2s_d_p.shape)
            t2s_box_p[h0:h1, w0:w1] = 1

            try:
                tp = np.where(s2t_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(s2t_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1
            s2t_box_p = np.zeros(s2t_d_p.shape)
            s2t_box_p[h0:h1, w0:w1] = 1

            rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :]
            rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p),
                                         0).transpose(0, 3, 1,
                                                      2)[np.newaxis, :]
            rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_rgb_p'] = np.stack(
                (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p),
                                        0).reshape(1, 2, 1, t2s_d_p.shape[0],
                                                   t2s_d_p.shape[1])
            rets['proj_mask_p'] = np.stack(
                (t2s_mask_p, s2t_mask_p),
                0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1])
            rets['proj_box_p'] = np.stack(
                (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0],
                                                   t2s_box_p.shape[1])

        imgs = imgs[np.newaxis, :]
        if self.rgbd:
            imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3)
        if self.normal:
            normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3)
        R = R[np.newaxis, :]
        Q = Q[np.newaxis, :]
        if self.segm:
            rets['segm'] = segm_
            if self.dynamicWeighting:
                rets['dynamicW'] = dynamicW[np.newaxis, :]

        if self.pointcloud:
            pointcloud = pointcloud[np.newaxis, :]
            pointcloud_flow = pointcloud_flow[np.newaxis, :]
            rets['pointcloud'] = pointcloud
            rets['pointcloud_flow'] = pointcloud_flow

        if self.plane_r:
            rets['plane'] = plane_eq[np.newaxis, :]
            rets['plane_raw'] = plane_eq_raw[np.newaxis, :]
            rets['plane_c'] = plane_center[np.newaxis, :]
            rets['valid_plane'] = valid_plane

        rets['interval'] = self.interval_this
        rets['norm'] = normal
        rets['rgb'] = imgs_rgb
        rets['depth'] = imgs
        rets['Q'] = Q
        rets['R'] = R
        rets['imgsPath'] = imgsPath
        return rets
Exemple #6
0
    def __getitem__(self, index):
        import ipdb
        ipdb.set_trace()
        rets = {}
        imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]),
                         dtype=np.float32)
        imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth),
                        dtype=np.float32)
        if self.rgbd:
            imgs_rgb = np.zeros(
                (self.nViews, self.Inputheight, self.Inputwidth, 3),
                dtype=np.float32)
            imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]),
                                 dtype=np.float32)
        if self.segm:
            segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        if self.normal:
            normal = np.zeros(
                (self.nViews, 3, self.Inputheight, self.Inputwidth),
                dtype=np.float32)

        R = np.zeros((self.nViews, 4, 4))
        Q = np.zeros((self.nViews, 7))
        assert (self.nViews == 2)
        ct0, ct1 = self.__getpair__(index)
        imgsPath = []
        basePath = self.base_this
        frameid0 = f"{ct0:06d}"
        frameid1 = f"{ct1:06d}"
        imgs[0] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy()
        imgs[1] = self.LoadImage(
            os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy()
        dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                            dtype=np.float32)
        dataMask[0, 0, :, :] = (imgs[0] != 0)
        dataMask[1, 0, :, :] = (imgs[1] != 0)
        rets['dataMask'] = dataMask[np.newaxis, :]
        if self.rgbd:
            imgs_rgb[0] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid0)),
                                         depth=False).copy() / 255.
            imgs_rgb[1] = self.LoadImage(os.path.join(
                basePath, 'rgb', '{}.png'.format(frameid1)),
                                         depth=False).copy() / 255.
        R[0] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid0 + '.pose.txt'))
        R[1] = np.loadtxt(
            os.path.join(basePath, 'pose', frameid1 + '.pose.txt'))
        Q[0, :4] = rot2Quaternion(R[0][:3, :3])
        Q[0, 4:] = R[0][:3, 3]
        Q[1, :4] = rot2Quaternion(R[1][:3, :3])
        Q[1, 4:] = R[1][:3, 3]
        imgsPath.append(f"{basePath}/{ct0:06d}")
        imgsPath.append(f"{basePath}/{ct1:06d}")

        if self.normal:
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid0)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[0] = tp.transpose(2, 0, 1)
            tp = self.LoadImage(os.path.join(basePath, 'normal',
                                             '{}.png'.format(frameid1)),
                                depth=False).copy().astype('float')
            mask = (tp == 0).sum(2) < 3
            tp[mask] = tp[mask] / 255. * 2 - 1
            normal[1] = tp.transpose(2, 0, 1)

            normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]),
                               dtype=np.float32)
            normal_[0] = cv2.resize(normal[0].transpose(1, 2, 0),
                                    self.OutputSize,
                                    interpolation=cv2.INTER_NEAREST).transpose(
                                        2, 0, 1)
            normal_[1] = cv2.resize(normal[1].transpose(1, 2, 0),
                                    self.OutputSize,
                                    interpolation=cv2.INTER_NEAREST).transpose(
                                        2, 0, 1)
            normal_ = normal_[np.newaxis, :]

        if self.segm:
            segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]),
                             dtype=np.float32)
            tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel',
                                              '{}.png'.format(frameid0)),
                                 depth=False)[:, :, 0].copy())
            segm[0] = tp.reshape(segm[0].shape)
            tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel',
                                              '{}.png'.format(frameid1)),
                                 depth=False)[:, :, 0].copy())
            segm[1] = tp.reshape(segm[1].shape)
            segm_[0] = segm[0]
            segm_[1] = segm[1]
            # truncate semantic class
            segm_[segm_ >= self.snumclass] = 0
            segm_ = segm_[np.newaxis, :]

        if self.denseCorres:
            # get 3d point cloud for each pano

            pcs, masks = self.Pano2PointCloud(
                imgs[0])  # be aware of the order of returned pc!!!
            pct, maskt = self.Pano2PointCloud(imgs[1])

            #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:]
            pct = np.matmul(np.linalg.inv(R[1]),
                            np.concatenate(
                                (pct, np.ones([1, pct.shape[1]]))))[:3, :]
            pcs = np.matmul(np.linalg.inv(R[0]),
                            np.concatenate(
                                (pcs, np.ones([1, pcs.shape[1]]))))[:3, :]
            # find correspondence using kdtree
            tree = KDTree(pct.T)
            IdxQuery = np.random.choice(range(pcs.shape[1]), 5000)
            # sample 5000 query points
            pcsQuery = pcs[:, IdxQuery]
            nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1)
            hasCorres = (nearest_dist < 0.08)

            idxTgtNeg = []

            idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], 160,
                                  640)
            idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], 160, 640)

            if hasCorres.sum() < 500:
                rets['denseCorres'] = {
                    'idxSrc': np.zeros([1, 2000, 2]),
                    'idxTgt': np.zeros([1, 2000, 2]),
                    'valid': np.array([0]),
                    'idxTgtNeg': idxTgtNeg
                }

            else:
                # only pick 2000 correspondence per pair
                idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000)
                idxSrc = idxSrc[idx2000][np.newaxis, :]
                idxTgt = idxTgt[idx2000][np.newaxis, :]

                rets['denseCorres'] = {
                    'idxSrc': idxSrc,
                    'idxTgt': idxTgt,
                    'valid': np.array([1]),
                    'idxTgtNeg': idxTgtNeg
                }

        if self.reproj:
            h = imgs.shape[1]
            pct, mask = util.depth2pc(
                imgs[1, :, 160:160 * 2],
                'matterport')  # be aware of the order of returned pc!!!
            ii = 1
            colorpct = imgs_rgb[1, :,
                                ii * h:(ii + 1) * h, :].reshape(-1, 3)[mask, :]
            normalpct = normal_[0, 1, :, :,
                                ii * h:(ii + 1) * h].reshape(3, -1).T[mask, :]
            depthpct = imgs[1, :, ii * h:(ii + 1) * h].reshape(-1)[mask]
            # get the coordinates of each point in the first coordinate system

            R_this = np.matmul(R[0], np.linalg.inv(R[1]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]

            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1

            t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p))

            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj

            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T

            t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            t2s_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            t2s_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')

            t2s_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            t2s_mask_p = (t2s_d_p != 0).astype('int')

            pct, mask = util.depth2pc(
                imgs[0, :, 160:160 * 2],
                'matterport')  # be aware of the order of returned pc!!!
            colorpct = imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape(-1,
                                                                      3)[mask]
            normalpct = normal_[0, 0, :, :,
                                ii * h:(ii + 1) * h].reshape(3, -1).T[mask]
            depthpct = imgs[0, :, ii * h:(ii + 1) * h].reshape(-1)[mask]
            R_this = np.matmul(R[1], np.linalg.inv(R[0]))
            R_this_p = R_this.copy()
            dR = util.randomRotation(epsilon=0.1)
            dRangle = angular_distance_np(dR[np.newaxis, :],
                                          np.eye(3)[np.newaxis, :])[0]
            R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3])
            R_this_p[:3, 3] += np.random.randn(3) * 0.1
            s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p))
            pct_reproj = np.matmul(
                R_this_p, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            pct_reproj_org = np.matmul(
                R_this, np.concatenate(
                    (pct.T, np.ones([1, pct.shape[0]]))))[:3, :]
            flow = pct_reproj_org - pct_reproj
            # assume always observe the second view(right view)
            normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T
            flow = flow.T

            s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct,
                                         imgs_rgb[0].shape, 'color')
            s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct,
                                           imgs_rgb[0].shape, 'color')
            s2t_n_p = self.reproj_helper(pct_reproj, normalpct,
                                         imgs_rgb[0].shape, 'normal')
            s2t_d_p = self.reproj_helper(pct_reproj, depthpct,
                                         imgs_rgb[0].shape[:2], 'depth')
            s2t_flow_p = self.reproj_helper(pct_reproj, flow,
                                            imgs_rgb[0].shape, 'color')
            s2t_mask_p = (s2t_d_p != 0).astype('int')

            # compute an envelop box
            try:
                tp = np.where(t2s_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(t2s_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1
            t2s_box_p = np.zeros(t2s_d_p.shape)
            t2s_box_p[h0:h1, w0:w1] = 1

            try:
                tp = np.where(s2t_d_p.sum(0))[0]
                w0, w1 = tp[0], tp[-1]
                tp = np.where(s2t_d_p.sum(1))[0]
                h0, h1 = tp[0], tp[-1]
            except:
                w0, h0 = 0, 0
                w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1
            s2t_box_p = np.zeros(s2t_d_p.shape)
            s2t_box_p[h0:h1, w0:w1] = 1

            rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :]
            rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p),
                                         0).transpose(0, 3, 1,
                                                      2)[np.newaxis, :]
            rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_rgb_p'] = np.stack(
                (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p),
                                        0).transpose(0, 3, 1, 2)[np.newaxis, :]
            rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p),
                                        0).reshape(1, 2, 1, t2s_d_p.shape[0],
                                                   t2s_d_p.shape[1])
            rets['proj_mask_p'] = np.stack(
                (t2s_mask_p, s2t_mask_p),
                0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1])
            rets['proj_box_p'] = np.stack(
                (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0],
                                                   t2s_box_p.shape[1])

        for v in range(self.nViews):
            imgs_[v] = cv2.resize(imgs[v],
                                  self.OutputSize,
                                  interpolation=cv2.INTER_NEAREST)
            if self.rgbd:
                imgs_rgb_[v] = cv2.resize(imgs_rgb[v],
                                          self.OutputSize).transpose(2, 0, 1)

        imgs_ = imgs_[np.newaxis, :]
        if self.rgbd:
            imgs_rgb_ = imgs_rgb_[np.newaxis, :]
        R = R[np.newaxis, :]
        Q = Q[np.newaxis, :]

        if self.segm:
            rets['segm'] = segm_
        rets['interval'] = self.interval_this
        rets['norm'] = normal_
        rets['rgb'] = imgs_rgb_
        rets['depth'] = imgs_
        rets['Q'] = Q
        rets['R'] = R
        rets['imgsPath'] = imgsPath
        return rets
    def __getitem__helper(self, index):

        rets = {}
        index = index % self.__len__()
        imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth),
                              dtype=np.float32)
        imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth),
                          dtype=np.float32)
        imgs_rgb = np.zeros(
            (self.nViews, self.Inputheight, self.Inputwidth, 3),
            dtype=np.float32)
        imgs_normal = np.zeros(
            (self.nViews, self.Inputheight, self.Inputwidth, 3),
            dtype=np.float32)
        pointcloud = np.zeros((self.nViews, 3 + 3 + 3 + 1, self.num_points),
                              dtype=np.float32)

        R = np.zeros((self.nViews, 4, 4))
        Q = np.zeros((7))
        assert (self.nViews == 2)
        imgsPath = []
        ct0, ct1 = self.__getpair__(index)

        rets['overlap'] = float(self.dataList[index]['overlap'])

        basePath = self.base_this
        scene_id = basePath.split('/')[-2]
        room_id = scene_id + '-' + basePath.split('/')[-1]

        imageKey = '%s-%06d-rgb' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_rgb[0] = cv2.imdecode(imageBuf,
                                   cv2.IMREAD_COLOR).astype('float') / 255.0
        imageKey = '%s-%06d-rgb' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_rgb[1] = cv2.imdecode(imageBuf,
                                   cv2.IMREAD_COLOR).astype('float') / 255.0

        imageKey = '%s-%06d-depth' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0
        imageKey = '%s-%06d-depth' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0

        #cv2.imwrite('test.png',imgs_rgb[0]*255)
        imageKey = '%s-%06d-normal' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_normal[0] = cv2.imdecode(
            imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1
        imageKey = '%s-%06d-normal' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_normal[1] = cv2.imdecode(
            imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1

        imageKey = '%s-%06d-semantic' % (room_id, ct0)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_s[0] = cv2.imdecode(
            imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1
        imageKey = '%s-%06d-semantic' % (room_id, ct1)
        imageBin = self.txn.get(imageKey.encode())
        imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
        imgs_s[1] = cv2.imdecode(
            imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1

        PerspectiveValidMask = (imgs_depth != 0)
        rets['PerspectiveValidMask'] = PerspectiveValidMask[None, :,
                                                            None, :, :]
        rets['dataMask'] = rets['PerspectiveValidMask']

        RKey = '%s-%06d-R' % (room_id, ct0)
        R[0] = np.frombuffer(self.txn.get(RKey.encode()),
                             np.float).reshape(4, 4)

        RKey = '%s-%06d-R' % (room_id, ct1)
        R[1] = np.frombuffer(self.txn.get(RKey.encode()),
                             np.float).reshape(4, 4)
        # convert from 3rd view to 4th view

        R[0] = np.matmul(np.linalg.inv(self.Rs[3]), R[0])
        R[1] = np.matmul(np.linalg.inv(self.Rs[3]), R[1])

        R_inv = np.linalg.inv(R)
        img2ind = np.zeros([2, self.num_points, 3])
        imgPCid = np.zeros([2, self.num_points, 2])

        if self.pointcloud or self.local:
            pc = self.depth2pc(imgs_depth[0][:, 160:160 * 2]).T
            # util.write_ply('test.ply',np.concatenate((pc,pc1)))
            idx_s = np.random.choice(range(len(pc)), self.num_points)
            imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T
            pointcloud[0, :3, :] = pc[idx_s, :].T
            pc_n = imgs_normal[0][:, 160:160 * 2].reshape(-1, 3)
            pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T
            pointcloud[0, 3:6, :] = pc_n[idx_s, :].T
            pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3)
            pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T
            pc_s = imgs_s[0, :, 160:160 * 2].reshape(-1)
            pointcloud[0, 9:10, :] = pc_s[idx_s]

            pc = self.depth2pc(imgs_depth[1][:, 160:160 * 2]).T
            idx_s = np.random.choice(range(len(pc)), self.num_points)
            imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T
            pointcloud[1, :3, :] = pc[idx_s, :].T
            pc_n = imgs_normal[1][:, 160:160 * 2].reshape(-1, 3)
            pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T
            pointcloud[1, 3:6, :] = pc_n[idx_s, :].T
            pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3)
            pointcloud[1, 6:9, :] = pc_c[idx_s, ::-1].T
            pc_s = imgs_s[1, :, 160:160 * 2].reshape(-1)
            pointcloud[1, 9:10, :] = pc_s[idx_s]

            rets['pointcloud'] = pointcloud[None, ...]

        if self.plane_r:
            Key = '%s-plane' % (room_id)
            plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()),
                                         np.float).reshape(-1, 9)
            Key = '%s-plane-validnum' % (room_id)
            valid_plane = np.frombuffer(self.txn.get(Key.encode()),
                                        np.uint8)[0]
            plane_eq = plane_eq_raw[:, 3:7]
            plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0]))
            plane_center = plane_eq_raw[:, :3]
            plane_center = (np.matmul(R[0][:3, :3], plane_center.T) +
                            R[0][:3, 3:4]).T

            rets['plane'] = plane_eq[np.newaxis, :]
            rets['plane_raw'] = plane_eq_raw[np.newaxis, :]
            rets['plane_c'] = plane_center[np.newaxis, :]
            rets['valid_plane'] = valid_plane

        if self.local:
            R_s2t = np.matmul(R[1], R_inv[0])
            pointcloud[0, :3, :] = np.matmul(
                R_s2t[:3, :3], pointcloud[0, :3, :]) + R_s2t[:3, 3:4]
            pointcloud[0, 3:6, :] = np.matmul(R_s2t[:3, :3],
                                              pointcloud[0, 3:6, :])

            #util.write_ply('test.ply', np.concatenate((pointcloud[0,:3,:].T,pointcloud[1,:3,:].T)),
            #  normal=np.concatenate((pointcloud[0,3:6,:].T,pointcloud[1,3:6,:].T)))
            if 1:
                N_PAIR_PTS = 1000
                N_PAIR_EXCEED_PTS = N_PAIR_PTS * 10
                ANGLE_THRESH = 5.0
                PERP_THRESH = np.cos(np.deg2rad(90 - ANGLE_THRESH))
                PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH))
                COPLANE_THRESH = 0.05
                rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS])
                ind_s = np.random.choice(pointcloud.shape[-1],
                                         N_PAIR_EXCEED_PTS)
                ind_t = np.random.choice(pointcloud.shape[-1],
                                         N_PAIR_EXCEED_PTS)
                pair_pts = np.stack((ind_s, ind_t), -1)
                normdot = (pointcloud[0, 3:6, pair_pts[:, 0]] *
                           pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1)
                dst = (np.abs(
                    ((pointcloud[0, 0:3, pair_pts[:, 0]] -
                      pointcloud[1, 0:3, pair_pts[:, 1]]) *
                     pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1)) + np.abs(
                         ((pointcloud[0, 0:3, pair_pts[:, 0]] -
                           pointcloud[1, 0:3, pair_pts[:, 1]]) *
                          pointcloud[0, 3:6, pair_pts[:, 0]]).sum(1))) / 2
                rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1
                rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH)
                            & (dst > COPLANE_THRESH)] = 2
                rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH)
                            & (dst <= COPLANE_THRESH)] = 3

            if self.split == 'train':
                # balance each class
                N_CLASS = 4
                pair_pts_select = []
                for j in range(N_CLASS):
                    ind = np.where(rel_cls_pts == j)[0]
                    if len(ind):
                        pair_pts_select.append(ind[np.random.choice(
                            len(ind), N_PAIR_PTS // N_CLASS)])
                pair_pts_select = np.concatenate(pair_pts_select)

                pair_pts_select = pair_pts_select[np.random.choice(
                    len(pair_pts_select), N_PAIR_PTS)]
                pair_pts = pair_pts[pair_pts_select]
                normdot = normdot[pair_pts_select]
                dst = dst[pair_pts_select]
                rel_cls_pts = rel_cls_pts[pair_pts_select]
            else:
                pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS)
                pair_pts = pair_pts[pair_pts_select]
                normdot = normdot[pair_pts_select]
                dst = dst[pair_pts_select]
                rel_cls_pts = rel_cls_pts[pair_pts_select]

            rets['normdot2'] = np.power(normdot, 2)[None, :]
            rets['dst2'] = np.power(dst, 2)[None, :]
            # convert to image coordinate

            R_t2s = np.linalg.inv(R_s2t)
            tp = (
                np.matmul(R_t2s[:3, :3], pointcloud[0, :3, pair_pts[:, 0]].T) +
                R_t2s[:3, 3:4]).T
            hfov = 90.0
            vfov = 2 * np.arctan(np.tan(hfov / 2 / 180 * np.pi)) / np.pi * 180

            zs = -tp[:, 2]
            ys = (0.5 - (tp[:, 1] / zs /
                         (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160
            xs = (0.5 + (tp[:, 0] / zs /
                         (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160
            uv_s = np.stack((xs, ys), -1)
            tp = pointcloud[1, :3, pair_pts[:, 1]]
            zs = -tp[:, 2]
            ys = (0.5 - (tp[:, 1] / zs /
                         (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160
            xs = (0.5 + (tp[:, 0] / zs /
                         (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160
            uv_t = np.stack((xs, ys), -1)
            rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :]
            rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :,
                                                        0].clip(0, 160 - 1)
            rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :,
                                                        1].clip(0, 160 - 1)
            rets['uv_pts'] = rets['uv_pts'].astype('int')

            rets['rel_cls_pts'] = rel_cls_pts[None, :]
            rets['pair_pts'] = pair_pts[None, :]

            if self.eval_local:

                # convert back into local coordinate
                R_t2s = np.matmul(R[0], R_inv[1])
                Kth = self.dataList[index % self.__len__()]['Kth']
                pointcloud[0, :3, :] = np.matmul(
                    R_t2s[:3, :3], pointcloud[0, :3, :]) + R_t2s[:3, 3:4]
                pointcloud[0, 3:6, :] = np.matmul(R_t2s[:3, :3],
                                                  pointcloud[0, 3:6, :])
                R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                           (room_id, ct0, ct1,
                                            Kth)]['pred_pose']
                gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                            (room_id, ct0, ct1,
                                             Kth)]['gt_pose']

                err_r = util.angular_distance_np(R_pred[:3, :3],
                                                 gt_pose[:3, :3])[0]
                rets['err_r'] = err_r

                rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)
                pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                              (room_id, ct0, ct1,
                                               Kth)]['pos_s_360']
                pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                              (room_id, ct0, ct1,
                                               Kth)]['pos_t_360']
                nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                              (room_id, ct0, ct1,
                                               Kth)]['nor_s_360']
                nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                              (room_id, ct0, ct1,
                                               Kth)]['nor_t_360']
                feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                               (room_id, ct0, ct1,
                                                Kth)]['feat_s_360']
                feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' %
                                               (room_id, ct0, ct1,
                                                Kth)]['feat_t_360']

                rets['pos_s_360'] = (pos_s_360[None, :])
                rets['pos_t_360'] = (pos_t_360[None, :])
                rets['nor_s_360'] = (nor_s_360[None, :])
                rets['nor_t_360'] = (nor_t_360[None, :])

                pointcloud[0, :3, :] = np.matmul(
                    R_pred[:3, :3], pointcloud[0, :3, :]) + R_pred[:3, 3:4]
                pointcloud[0, 3:6, :] = np.matmul(R_pred[:3, :3],
                                                  pointcloud[0, 3:6, :])
                igt = np.matmul(R_s2t, np.linalg.inv(R_pred))
                rets['igt'] = igt[None, :]
                rets['pred_pose'] = R_pred[None, :]
                rets['gt_pose'] = gt_pose[None, :]
                R_gt = igt[:3, :3]
                t_gt = igt[:3, 3:4]

            else:
                delta_R = util.randomRotation(epsilon=0.1)
                delta_t = np.random.randn(3) * 0.1

                pointcloud_s_perturb = np.matmul(
                    delta_R, pointcloud[0, :3, :] -
                    pointcloud[0, :3, :].mean(1)[:, None]
                ) + delta_t[:, None] + pointcloud[0, :3, :].mean(1)[:, None]
                tp_R = delta_R
                tp_t = np.matmul(
                    np.eye(3) - delta_R,
                    pointcloud[0, :3, :].mean(1)[:, None]) + delta_t[:, None]

                t_gt = np.matmul(
                    np.eye(3) - delta_R.T,
                    pointcloud[0, :3, :].mean(1)[:, None]) - np.matmul(
                        delta_R.T, delta_t[:, None])
                R_gt = delta_R.T
                igt = np.eye(4)
                igt[:3, :3] = R_gt
                igt[:3, 3] = t_gt.squeeze()
                rets['igt'] = igt[None, :]
                pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0,
                                                                       3:6, :])
                pointcloud[0, :3, :] = pointcloud_s_perturb
                pointcloud[0, 3:6, :] = pointcloud_s_n_perturb

            Q = np.concatenate((util.rot2Quaternion(R_gt), t_gt.squeeze()))
            R_ = np.eye(4)
            R_[:3, :3] = R_gt
            R_[:3, 3] = t_gt.squeeze()
            R_inv = np.linalg.inv(R_)

            rets['pointcloud'] = pointcloud[None, ...]

        if self.topdown:

            Key = '%s-pc' % (room_id)
            roompc = np.frombuffer(self.txn.get(Key.encode()),
                                   np.float).reshape(-1, 3)
            roompc = roompc[np.random.choice(roompc.shape[0], 20000)]
            rets['roompc'] = roompc[None, :]

            Key = '%s-floor' % (room_id)
            plane_eq = np.frombuffer(self.txn.get(Key.encode()),
                                     np.float).reshape(4)
            plane_eqs = np.zeros([2, 4])
            plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0]))
            plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3]) + 1e-16)
            plane_eqs[0, :] = plane_eq_0.copy()
            plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1]))
            plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16)
            plane_eqs[1, :] = plane_eq_1.copy()

            colors = np.random.rand(15 + 1, 3)
            # resolution = 0.02 # 0.2m
            resolution = 0.04

            height = 224
            width = 224

            pc0 = pointcloud[0, 0:3, :].T
            pc2ind = np.zeros([2, len(pc0), 3])

            npts = np.zeros([2])
            pc2ind_mask = np.zeros([2, pointcloud.shape[2]])

            # the floor plane
            # (0, 1, 0)'x + d = 0

            # remove partial view's ceiling
            dst = np.abs(
                ((plane_eq_0[:3][None, :] * pc0).sum(1) + plane_eq_0[3]))
            mask = dst < 1.5

            validind = np.where(mask)[0]
            invalidind = np.where(~mask)[0]

            npts[0] = len(validind)
            pc0 = pc0[mask]
            pc2ind_mask[0] = mask

            # project camera position(0,0,0) to floor plane
            origin_0 = -plane_eq_0[:3] * plane_eq_0[3]
            # axis [0,0,-1], []
            axis_base = np.array([0, 0, -1])
            axis_y_0 = axis_base - np.dot(axis_base,
                                          plane_eq_0[:3]) * plane_eq_0[:3]
            axis_y_0 /= (np.linalg.norm(axis_y_0) + 1e-16)
            axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3])
            axis_x_0 /= (np.linalg.norm(axis_x_0) + 1e-16)
            axis_z_0 = plane_eq_0[:3]

            imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_c_partial_0 = cv2.imdecode(
                imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.
            imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_c_partial_1 = cv2.imdecode(
                imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.

            imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_c_complete_0 = cv2.imdecode(
                imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.
            imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_c_complete_1 = cv2.imdecode(
                imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.

            imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_s_complete_0 = cv2.imdecode(
                imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')
            imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1)
            imageBin = self.txn.get(imageKey.encode())
            imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
            topdown_s_complete_1 = cv2.imdecode(
                imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')

            tp = ~topdown_c_partial_0.sum(2).astype('bool')
            edt_0 = ndimage.distance_transform_edt(tp, return_indices=False)
            edt_0 = np.maximum(0.1, np.power(0.98, edt_0))
            tp = ~topdown_c_partial_1.sum(2).astype('bool')
            edt_1 = ndimage.distance_transform_edt(tp, return_indices=False)
            edt_1 = np.maximum(0.1, np.power(0.98, edt_1))
            rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...]

            u = ((pc0 - origin_0[None, :]) * axis_x_0[None, :]).sum(1)
            v = ((pc0 - origin_0[None, :]) * axis_y_0[None, :]).sum(1)
            z = ((pc0 - origin_0[None, :]) * axis_z_0[None, :]).sum(1)
            # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s])

            u = width // 2 + (u / resolution).astype('int')
            v = height // 2 - (v / resolution).astype('int')
            ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
            topdown_ind_0 = np.stack((u, v, ind_z), -1)

            u = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) *
                 axis_x_0[None, :]).sum(1)
            v = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) *
                 axis_y_0[None, :]).sum(1)
            z = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) *
                 axis_z_0[None, :]).sum(1)
            u = width // 2 + (u / resolution).astype('int')
            v = height // 2 - (v / resolution).astype('int')
            ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
            topdown_ind_img_0 = np.stack((u, v, ind_z), -1)

            pc2ind[0, mask] = topdown_ind_0
            pc1 = pointcloud[1, 0:3, :].T
            plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1]))
            plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16)
            plane_eqs[1, :] = plane_eq_1.copy()
            dst = np.abs(
                ((plane_eq_1[:3][None, :] * pc1).sum(1) + plane_eq_1[3]))
            mask = dst < 1.5

            validind = np.where(mask)[0]
            invalidind = np.where(~mask)[0]

            npts[1] = len(validind)
            pc1 = pc1[mask]
            pc2ind_mask[1] = mask

            origin_1 = -plane_eq_1[:3] * plane_eq_1[3]
            # axis [0,0,-1], []
            axis_base = np.array([0, 0, -1])
            axis_y_1 = axis_base - np.dot(axis_base,
                                          plane_eq_1[:3]) * plane_eq_1[:3]
            axis_y_1 /= (np.linalg.norm(axis_y_1) + 1e-16)
            axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3])
            axis_x_1 /= (np.linalg.norm(axis_x_1) + 1e-16)
            axis_z_1 = plane_eq_1[:3]

            u = ((pc1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1)
            v = ((pc1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1)
            z = ((pc1 - origin_1[None, :]) * axis_z_1[None, :]).sum(1)

            u = width // 2 + (u / resolution).astype('int')
            v = height // 2 - (v / resolution).astype('int')
            ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
            topdown_ind_1 = np.stack((u, v, ind_z), -1)

            u = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) *
                 axis_x_1[None, :]).sum(1)
            v = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) *
                 axis_y_1[None, :]).sum(1)
            z = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) *
                 axis_z_1[None, :]).sum(1)
            u = width // 2 + (u / resolution).astype('int')
            v = height // 2 - (v / resolution).astype('int')
            ind_z = np.digitize(z, [-0.1, 0.7, 1.5])
            topdown_ind_img_1 = np.stack((u, v, ind_z), -1)

            img2ind[0] = topdown_ind_img_0
            img2ind[1] = topdown_ind_img_1
            pc2ind[1, mask] = topdown_ind_1
            rets['img2ind'] = img2ind[None, ...]
            rets['imgPCid'] = imgPCid[None, ...]
            rets['axis_x'] = np.zeros([2, 3])
            rets['axis_y'] = np.zeros([2, 3])
            rets['origin'] = np.zeros([2, 3])

            rets['axis_x'][0] = axis_x_0
            rets['axis_y'][0] = axis_y_0
            rets['axis_x'][1] = axis_x_1
            rets['axis_y'][1] = axis_y_1
            rets['origin'][0] = origin_0
            rets['origin'][1] = origin_1
            rets['axis_x'] = rets['axis_x'][None, :]
            rets['axis_y'] = rets['axis_y'][None, :]
            rets['origin'] = rets['origin'][None, :]
            # sample points on source floor plane:

            mask = ~((topdown_c_partial_0 == 0).sum(2) == 3)
            vs, us = np.where(mask)
            if not len(vs):
                vs = np.array([0, 0])
                us = np.array([0, 0])
            ind = np.random.choice(len(vs), 100)
            u_0 = us[ind]
            v_0 = vs[ind]

            kp_uv_0 = np.stack((u_0, v_0), -1)
            u_0 -= width // 2
            v_0 -= height // 2

            kp_3d_0 = origin_0[None, :] + axis_x_0[
                None, :] * u_0[:, None] * resolution - axis_y_0[
                    None, :] * v_0[:, None] * resolution

            R01 = np.matmul(R[1], R_inv[0])
            kp_3d_1 = (np.matmul(R01[:3, :3], kp_3d_0.T) + R01[:3, 3:4]).T

            # random sample a set of points as negative correspondencs

            mask = ~((topdown_c_partial_1 == 0).sum(2) == 3)
            vs_neg, us_neg = np.where(mask)
            if not len(vs_neg):
                vs_neg = np.array([0, 0])
                us_neg = np.array([0, 0])
            ind = np.random.choice(len(vs_neg), 100 * 100)
            u_neg_1 = us_neg[ind]
            v_neg_1 = vs_neg[ind]

            kp_uv_neg_1 = np.stack((u_neg_1, v_neg_1), -1)
            u_neg_1 -= width // 2
            v_neg_1 -= height // 2
            kp_3d_neg_1 = origin_1[None, :] + axis_x_1[
                None, :] * u_neg_1[:, None] * resolution - axis_y_1[
                    None, :] * v_neg_1[:, None] * resolution
            R10 = np.matmul(R[0], R_inv[1])
            kp_3d_neg_0 = (np.matmul(R10[:3, :3], kp_3d_neg_1.T) +
                           R10[:3, 3:4]).T
            u_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) *
                       axis_x_0[None, :]).sum(1)
            v_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) *
                       axis_y_0[None, :]).sum(1)
            u_neg_0 = width // 2 + (u_neg_0 / resolution).astype('int')
            v_neg_0 = height // 2 - (v_neg_0 / resolution).astype('int')
            kp_uv_neg_0 = np.stack((u_neg_0, v_neg_0), -1)
            kp_uv_neg_0[:, 0] = kp_uv_neg_0[:, 0].clip(0, width - 1)
            kp_uv_neg_0[:, 1] = kp_uv_neg_0[:, 1].clip(0, height - 1)
            kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2)
            kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2)
            w_uv_neg_1 = 1 - np.maximum(
                0.1,
                np.power(
                    0.98,
                    np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2)))

            u_1 = ((kp_3d_1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1)
            v_1 = ((kp_3d_1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1)
            u_1 = width // 2 + (u_1 / resolution).astype('int')
            v_1 = height // 2 - (v_1 / resolution).astype('int')
            kp_uv_1 = np.stack((u_1, v_1), -1)

            topdown_c_complete = np.stack(
                (topdown_c_complete_0,
                 topdown_c_complete_1)).transpose(0, 3, 1, 2)
            topdown_s_complete = np.stack(
                (topdown_s_complete_0, topdown_s_complete_1))
            topdown_c_partial = np.stack(
                (topdown_c_partial_0, topdown_c_partial_1))

            kp_uv_0[:, 0] = kp_uv_0[:, 0].clip(0, width - 1)
            kp_uv_0[:, 1] = kp_uv_0[:, 1].clip(0, height - 1)
            kp_uv_1[:, 0] = kp_uv_1[:, 0].clip(0, width - 1)
            kp_uv_1[:, 1] = kp_uv_1[:, 1].clip(0, height - 1)
            rets['kp_uv'] = np.stack((kp_uv_0, kp_uv_1))[None, ...]
            rets['kp_uv_neg'] = kp_uv_neg_1[None, ...]
            rets['w_uv_neg'] = w_uv_neg_1[None, ...]
            rets['plane_eq'] = plane_eqs[None, ...]
            rets['pc2ind'] = pc2ind[None, ...]

            rets['pc2ind_mask'] = pc2ind_mask[None, ...]
            rets['topdown'] = topdown_c_complete[None, ...]
            rets['topdown_s'] = topdown_s_complete[None, ...]
            rets['topdown_partial'] = topdown_c_partial.transpose(0, 3, 1,
                                                                  2)[None, ...]
            TopDownValidMask = ((topdown_c_complete == 0).sum(1, keepdims=True)
                                != 3)
            rets['TopDownValidMask'] = TopDownValidMask[None, ...]
            rets['npts'] = npts[None, ...]

        imgsPath.append(f"{basePath}/{ct0:06d}")
        imgsPath.append(f"{basePath}/{ct1:06d}")

        rets['norm'] = imgs_normal.transpose(0, 3, 1, 2)[None, ...]
        rets['rgb'] = imgs_rgb.transpose(0, 3, 1, 2)[None, ...]
        rets['semantic'] = imgs_s[None, ...]
        rets['depth'] = imgs_depth[None, :, None, :, :]
        rets['Q'] = Q[None, ...]
        rets['R'] = R[None, ...]
        rets['R_inv'] = R_inv[None, ...]
        rets['imgsPath'] = imgsPath

        return rets, True