def get_model_poses(self, scene_id, ann_id):
     '''
     **Input:**
     - scene_id: int of the scen index.
     - ann_id: int of the annotation index.
     **Output:**
     - obj_list: list of int of object index.
     - pose_list: list of 4x4 matrices of object poses.
     - camera_pose: 4x4 matrix of the camera pose relative to the first frame.
     - align mat: 4x4 matrix of camera relative to the table.
     '''
     scene_dir = os.path.join(self.root, 'scenes')
     camera_poses_path = os.path.join(self.root, 'scenes',
                                      get_scene_name(scene_id), self.camera,
                                      'camera_poses.npy')
     camera_poses = np.load(camera_poses_path)
     camera_pose = camera_poses[ann_id]
     align_mat_path = os.path.join(self.root, 'scenes',
                                   get_scene_name(scene_id), self.camera,
                                   'cam0_wrt_table.npy')
     align_mat = np.load(align_mat_path)
     # print('Scene {}, {}'.format(scene_id, camera))
     scene_reader = xmlReader(
         os.path.join(scene_dir, get_scene_name(scene_id), self.camera,
                      'annotations', '%04d.xml' % (ann_id, )))
     posevectors = scene_reader.getposevectorlist()
     obj_list = []
     pose_list = []
     for posevector in posevectors:
         obj_idx, mat = parse_posevector(posevector)
         obj_list.append(obj_idx)
         pose_list.append(mat)
     return obj_list, pose_list, camera_pose, align_mat
Ejemplo n.º 2
0
    def continuous_eval_scene(self,
                              scene_id,
                              annid_offset,
                              views,
                              grasp_group,
                              init=False,
                              test_time=False):
        scene_id = get_sceneId4det(scene_id)
        eval_dir = os.path.join(self.root, 'scenes', get_scene_name(scene_id),
                                self.camera, 'eval', self.offset(annid_offset))
        if not os.path.exists(eval_dir):
            os.mkdir(eval_dir)

        eval_path = os.path.join(eval_dir, self.views2filename(views))
        if not os.path.exists(eval_path):
            scene_accuracy = self.eval_scene(scene_id,
                                             grasp_group=grasp_group)[0]
            log_str(
                f'Scene: {scene_id:04} +{annid_offset:02} ({eval_path[-20:]}) Accuracy: {np.mean(scene_accuracy):.2f} {views}'
            )
            np.save(eval_path, scene_accuracy)
        # elif test_time:
        #     scene_accuracy = self.eval_scene(scene_id, grasp_group=grasp_group)[0]

        if init:
            return not os.path.exists(eval_path)
        else:
            scene_accuracy = np.load(eval_path)
            return scene_accuracy
 def get_scene_models(self, scene_id, ann_id):
     '''
         return models in model coordinate
     '''
     model_dir = os.path.join(self.root, 'models')
     # print('Scene {}, {}'.format(scene_id, camera))
     scene_reader = xmlReader(
         os.path.join(self.root, 'scenes', get_scene_name(scene_id),
                      self.camera, 'annotations', '%04d.xml' % (ann_id, )))
     posevectors = scene_reader.getposevectorlist()
     obj_list = []
     model_list = []
     dexmodel_list = []
     for posevector in posevectors:
         obj_idx, _ = parse_posevector(posevector)
         obj_list.append(obj_idx)
     for obj_idx in obj_list:
         model = o3d.io.read_point_cloud(
             os.path.join(model_dir, '%03d' % obj_idx, 'nontextured.ply'))
         dex_cache_path = os.path.join(self.root, 'dex_models',
                                       '%03d.pkl' % obj_idx)
         if os.path.exists(dex_cache_path):
             with open(dex_cache_path, 'rb') as f:
                 dexmodel = pickle.load(f)
         else:
             dexmodel = load_dexnet_model(
                 os.path.join(model_dir, '%03d' % obj_idx, 'textured'))
         points = np.array(model.points)
         model_list.append(points)
         dexmodel_list.append(dexmodel)
     return model_list, dexmodel_list, obj_list
Ejemplo n.º 4
0
    def get_grasp_group(self, scene_id, ann_id):
        # print(ann_id)
        # import ipdb; ipdb.set_trace()
        camera_poses_path = os.path.join(self.dataset_root, 'scenes',
                                         get_scene_name(scene_id), self.camera,
                                         'camera_poses.npy')
        if self.quick:  # use cached grasp files
            npy_file_path = os.path.join(self.dump_dir,
                                         get_scene_name(scene_id), self.camera,
                                         '%04d.npy' % (ann_id, ))
            gg = AroundViewGraspGroup().from_npy(npy_file_path,
                                                 camera_poses_path)
        else:
            data_idx = scene_id * 256 + ann_id
            data = self.dataset.__getitem__(data_idx)
            data['point_clouds'] = to_var(data['point_clouds']).unsqueeze(0)
            data['cloud_colors'] = to_var(data['cloud_colors']).unsqueeze(0)

            # Forward pass
            with torch.no_grad():
                end_points = self.net(data)
                grasp_preds = pred_decode(end_points)
            preds = grasp_preds[0].detach().cpu().numpy()
            gg = GraspGroup(preds)

            # collision detection
            if self.collision_thresh > 0:
                cloud, _ = self.dataset.get_data(data_idx,
                                                 return_raw_cloud=True)
                mfcdetector = ModelFreeCollisionDetector(
                    cloud, voxel_size=self.voxel_size)
                collision_mask = mfcdetector.detect(
                    gg,
                    approach_dist=0.05,
                    collision_thresh=self.collision_thresh)
                gg = gg[~collision_mask]

            gg = AroundViewGraspGroup(np.array([ann_id] * len(gg)),
                                      np.load(camera_poses_path),
                                      gg.grasp_group_array)
        return gg
    def eval_scene(self,
                   scene_id,
                   dump_folder,
                   TOP_K=50,
                   return_list=False,
                   vis=False,
                   max_width=0.1):
        '''
        **Input:**
        - scene_id: int of the scene index.
        - dump_folder: string of the folder that saves the dumped npy files.
        - TOP_K: int of the top number of grasp to evaluate
        - return_list: bool of whether to return the result list.
        - vis: bool of whether to show the result
        - max_width: float of the maximum gripper width in evaluation

        **Output:**
        - scene_accuracy: np.array of shape (256, 50, 6) of the accuracy tensor.
        '''
        config = get_config()
        table = create_table_points(1.0,
                                    1.0,
                                    0.05,
                                    dx=-0.5,
                                    dy=-0.5,
                                    dz=-0.05,
                                    grid_size=0.008)

        list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]

        model_list, dexmodel_list, _ = self.get_scene_models(scene_id,
                                                             ann_id=0)

        model_sampled_list = list()
        for model in model_list:
            model_sampled = voxel_sample_points(model, 0.008)
            model_sampled_list.append(model_sampled)

        scene_accuracy = []
        grasp_list_list = []
        score_list_list = []
        collision_list_list = []

        # let ann_id == 0, and transform it to all ann_ids
        camera_poses_path = os.path.join(self.root, 'scenes',
                                         get_scene_name(scene_id), self.camera,
                                         'camera_poses.npy')
        grasp_group_0 = AroundViewGraspGroup().from_npy(
            os.path.join(dump_folder, get_scene_name(scene_id), self.camera,
                         '%04d.npy' % (0, )), camera_poses_path)
        # grasp_group_0 = grasp_group_0[:2]
        for ann_id in range(0, 10):
            grasp_group = grasp_group_0.to_view(ann_id)
            _, pose_list, camera_pose, align_mat = self.get_model_poses(
                scene_id, ann_id)
            table_trans = transform_points(
                table, np.linalg.inv(np.matmul(align_mat, camera_pose)))

            # clip width to [0,max_width]
            gg_array = grasp_group.grasp_group_array
            min_width_mask = (gg_array[:, 1] < 0)
            max_width_mask = (gg_array[:, 1] > max_width)
            gg_array[min_width_mask, 1] = 0
            gg_array[max_width_mask, 1] = max_width
            grasp_group.grasp_group_array = gg_array

            grasp_list, score_list, collision_mask_list = eval_grasp(
                grasp_group,
                model_sampled_list,
                dexmodel_list,
                pose_list,
                config,
                table=table_trans,
                voxel_size=0.008,
                TOP_K=TOP_K)

            # remove empty
            grasp_list = [x for x in grasp_list if len(x) != 0]
            score_list = [x for x in score_list if len(x) != 0]
            collision_mask_list = [
                x for x in collision_mask_list if len(x) != 0
            ]

            if len(grasp_list) == 0:
                grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction)))
                scene_accuracy.append(grasp_accuracy)
                grasp_list_list.append([])
                score_list_list.append([])
                collision_list_list.append([])
                print('\rMean Accuracy for scene:{} ann:{}='.format(
                    scene_id, ann_id),
                      np.mean(grasp_accuracy[:, :]),
                      end='')
                continue

            # concat into scene level
            grasp_list, score_list, collision_mask_list = np.concatenate(
                grasp_list), np.concatenate(score_list), np.concatenate(
                    collision_mask_list)

            if vis:
                t = o3d.geometry.PointCloud()
                t.points = o3d.utility.Vector3dVector(table_trans)
                model_list = generate_scene_model(self.root,
                                                  'scene_%04d' % scene_id,
                                                  ann_id,
                                                  return_poses=False,
                                                  align=False,
                                                  camera=self.camera)
                import copy
                gg = AroundViewGraspGroup(copy.deepcopy(grasp_list))
                scores = np.array(score_list)
                scores = scores / 2 + 0.5  # -1 -> 0, 0 -> 0.5, 1 -> 1
                scores[collision_mask_list] = 0.3
                gg.scores = scores
                gg.widths = 0.1 * np.ones((len(gg)), dtype=np.float32)
                grasps_geometry = gg.to_open3d_geometry_list()
                pcd = self.loadScenePointCloud(scene_id, self.camera, ann_id)

                o3d.visualization.draw_geometries([pcd, *grasps_geometry])
                # o3d.visualization.draw_geometries([pcd, *grasps_geometry, *model_list])
                # o3d.visualization.draw_geometries([*grasps_geometry, *model_list, t])

            # sort in scene level
            grasp_confidence = grasp_list[:, 0]
            indices = np.argsort(-grasp_confidence)
            grasp_list, score_list, collision_mask_list = grasp_list[
                indices], score_list[indices], collision_mask_list[indices]

            grasp_list_list.append(grasp_list)
            score_list_list.append(score_list)
            collision_list_list.append(collision_mask_list)

            #calculate AP
            grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction)))
            for fric_idx, fric in enumerate(list_coe_of_friction):
                for k in range(0, TOP_K):
                    if k + 1 > len(score_list):
                        grasp_accuracy[k, fric_idx] = np.sum(
                            ((score_list <= fric) &
                             (score_list > 0)).astype(int)) / (k + 1)
                    else:
                        grasp_accuracy[k, fric_idx] = np.sum(
                            ((score_list[0:k + 1] <= fric) &
                             (score_list[0:k + 1] > 0)).astype(int)) / (k + 1)

            print('\rMean Accuracy for scene:%04d ann:%04d = %.3f' %
                  (scene_id, ann_id, 100.0 * np.mean(grasp_accuracy[:, :])),
                  end='',
                  flush=True)
            import ipdb
            ipdb.set_trace()
            scene_accuracy.append(grasp_accuracy)
        if not return_list:
            return scene_accuracy
        else:
            return scene_accuracy, grasp_list_list, score_list_list, collision_list_list