Esempio n. 1
0
def parse_groundtruths(end_points, config_dict):
    """ Parse groundtruth labels to OBB parameters.
    
    Args:
        end_points: dict
            {center_label, heading_class_label, heading_residual_label,
            size_class_label, size_residual_label, sem_cls_label,
            box_label_mask}
        config_dict: dict
            {dataset_config}

    Returns:
        batch_gt_map_cls: a list  of len == batch_size (BS)
            [gt_list_i], i = 0, 1, ..., BS-1
            where gt_list_i = [(gt_sem_cls, gt_box_params)_j]
            where j = 0, ..., num of objects - 1 at sample input i
    """
    center_label = end_points['center_label']
    heading_class_label = end_points['heading_class_label']
    heading_residual_label = end_points['heading_residual_label']
    size_class_label = end_points['size_class_label']
    size_residual_label = end_points['size_residual_label']
    box_label_mask = end_points['box_label_mask']
    sem_cls_label = end_points['sem_cls_label']
    bsize = center_label.shape[0]

    K2 = center_label.shape[1]  # K2==MAX_NUM_OBJ
    gt_corners_3d_upright_camera = np.zeros((bsize, K2, 8, 3))
    # gt_center_upright_camera = flip_axis_to_camera(center_label[:,:,0:3].detach().cpu().numpy())
    gt_center_upright_camera = center_label[:, :, 0:3].detach().cpu().numpy()
    for i in range(bsize):
        for j in range(K2):
            if box_label_mask[i, j] == 0: continue
            heading_angle = config_dict['dataset_config'].class2angle(
                heading_class_label[i, j].detach().cpu().numpy(),
                heading_residual_label[i, j].detach().cpu().numpy())
            box_size = config_dict['dataset_config'].class2size(
                int(size_class_label[i, j].detach().cpu().numpy()),
                size_residual_label[i, j].detach().cpu().numpy())
            corners_3d_upright_camera = get_3d_box(
                box_size, heading_angle, gt_center_upright_camera[i, j, :])
            gt_corners_3d_upright_camera[i, j] = corners_3d_upright_camera

    batch_gt_map_cls = []
    for i in range(bsize):
        batch_gt_map_cls.append([
            (sem_cls_label[i, j].item(), gt_corners_3d_upright_camera[i, j])
            for j in range(gt_corners_3d_upright_camera.shape[1])
            if box_label_mask[i, j] == 1
        ])
    end_points['batch_gt_map_cls'] = batch_gt_map_cls

    return batch_gt_map_cls
Esempio n. 2
0
def predictions2corners3d(end_points, config_dict):
    """ Convert predictions to OBB parameters (eight corner points)
    Args:
        end_points: dict
            {point_clouds, center, heading_scores, heading_residuals,
            size_scores, size_residuals, sem_cls_scores}
        config_dict: dict
            {dataset_config, remove_empty_box, use_3d_nms, nms_iou,
            use_old_type_nms, conf_thresh, per_class_proposal}
    Returns:
        pred_corners_3d_upright_camera: ndarray (num_batch, num_proposals, 8, 3)
        pred_box_parameters:  ndarray (num_batch, num_proposals, 7)
    """
    pred_center = end_points['center'] # B,num_proposal,3
    pred_heading_class = torch.argmax(end_points['heading_scores'], -1) # B,num_proposal
    pred_heading_residual = torch.gather(end_points['heading_residuals'], 2,
        pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
    pred_heading_residual.squeeze_(2)
    pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal
    pred_size_residual = torch.gather(end_points['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
    pred_size_residual.squeeze_(2)

    num_proposal = pred_center.shape[1]
    # Since we operate in upright_depth coord for points, while util functions
    # assume upright_camera coord.
    bsize = pred_center.shape[0]
    pred_box_parameters = np.zeros((bsize, num_proposal, 7), dtype=np.float32)
    pred_box_parameters[:,:,0:3] = pred_center.detach().cpu().numpy()
    pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3), dtype=np.float32)
    pred_center_upright_camera = flip_axis_to_camera(pred_center.detach().cpu().numpy())
    for i in range(bsize):
        for j in range(num_proposal):
            heading_angle = config_dict['dataset_config'].class2angle(\
                pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy())
            box_size = config_dict['dataset_config'].class2size(\
                int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy())
            pred_box_parameters[i,j,3:6] = box_size
            pred_box_parameters[i,j,6] = heading_angle
            corners_3d_upright_camera = get_3d_box(box_size, heading_angle, pred_center_upright_camera[i,j,:])
            pred_corners_3d_upright_camera[i,j] = corners_3d_upright_camera

    return pred_corners_3d_upright_camera, pred_box_parameters
Esempio n. 3
0
def get_roi_ptcloud(inputs, batch_pred_boxes_params, enlarge_ratio=1.2, num_point_roi=512, min_num_point=100):
    """ Generate ROI point cloud w.r.t predicted box

    :param inputs: dict {'point_clouds'}
                   input point clouds of the whole scene
           batch_pred_boxes_params: (B, num_proposals, 7), numpy array
                   predicted bounding box from detector
           enlarge_ratio: scalar
                   the value to enlarge the predicted box size
           num_point_roi: scalar
                   the number of points to be sampled in each enlarged box

    :return:
        batch_pc_roi: (B, num_proposals, num_sampled_points, input_pc_features) numpy array
        nonempty_roi_mask: (B, num_proposals) numpy array
    """
    batch_pc = inputs['point_clouds'].detach().cpu().numpy()[:, :, :]  # B,N,C
    bsize = batch_pred_boxes_params.shape[0]
    K = batch_pred_boxes_params.shape[1]
    batch_pc_roi = np.zeros((bsize, K, num_point_roi, batch_pc.shape[2]), dtype=np.float32)
    nonempty_roi_mask = np.ones((bsize, K))

    for i in range(bsize):
        pc = batch_pc[i, :, :]  # (N,C)
        for j in range(K):
            box_params = batch_pred_boxes_params[i, j, :]  # (7)
            center = box_params[0:3]
            center_upright_camera = flip_axis_to_camera(center)#.reshape(1,-1))[0]
            box_size = box_params[3:6]*enlarge_ratio #enlarge the box size
            heading_angle = box_params[6]
            box3d = get_3d_box(box_size, heading_angle, center_upright_camera)
            box3d = flip_axis_to_depth(box3d)
            pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
            # print('The number of points in roi box is ', pc_in_box.shape[0])
            if len(pc_in_box) >= min_num_point:
                batch_pc_roi[i, j, :, :] = random_sampling(pc_in_box, num_point_roi)
            else:
                nonempty_roi_mask[i,j] = 0
    return batch_pc_roi, nonempty_roi_mask
Esempio n. 4
0
def groundtruths2corners3d(end_points, config_dict):
    """ Convert predictions to OBB parameters (eight corner points)
    Args:
        end_points: dict
            {center_label, heading_class_label, heading_residual_label,
            size_class_label, size_residual_label, sem_cls_label,
            box_label_mask}
        config_dict: dict
            {dataset_config}
    Returns:
        gt_corners_3d_upright_camera: ndarray (num_batch, MAX_NUM_OBJ, 8, 3)
        gt_box_parameters:  ndarray (num_batch, num_proposals, 7)
    """
    center_label = end_points['center_label']
    heading_class_label = end_points['heading_class_label']
    heading_residual_label = end_points['heading_residual_label']
    size_class_label = end_points['size_class_label']
    size_residual_label = end_points['size_residual_label']
    box_label_mask = end_points['box_label_mask']
    bsize = center_label.shape[0]

    K2 = center_label.shape[1] # K2==MAX_NUM_OBJ
    gt_box_parameters = np.zeros((bsize, K2, 7), dtype=np.float32)
    gt_box_parameters[:, :, 0:3] = center_label.detach().cpu().numpy()
    gt_corners_3d_upright_camera = np.zeros((bsize, K2, 8, 3), dtype=np.float32)
    gt_center_upright_camera = flip_axis_to_camera(center_label[:,:,0:3].detach().cpu().numpy())
    for i in range(bsize):
        for j in range(K2):
            if box_label_mask[i,j] == 0: continue
            heading_angle = config_dict['dataset_config'].class2angle(heading_class_label[i,j].detach().cpu().numpy(), heading_residual_label[i,j].detach().cpu().numpy())
            box_size = config_dict['dataset_config'].class2size(int(size_class_label[i,j].detach().cpu().numpy()), size_residual_label[i,j].detach().cpu().numpy())
            gt_box_parameters[i,j,3:6] = box_size
            gt_box_parameters[i,j,6] = heading_angle
            corners_3d_upright_camera = get_3d_box(box_size, heading_angle, gt_center_upright_camera[i,j,:])
            gt_corners_3d_upright_camera[i,j] = corners_3d_upright_camera

    return gt_corners_3d_upright_camera, gt_box_parameters
Esempio n. 5
0
def dump_results(args, scanrefer, data, config):
    dump_dir = os.path.join(CONF.PATH.OUTPUT, args.folder, "vis")
    os.makedirs(dump_dir, exist_ok=True)

    # from inputs
    ids = data['scan_idx'].detach().cpu().numpy()
    point_clouds = data['point_clouds'].cpu().numpy()
    batch_size = point_clouds.shape[0]

    pcl_color = data["pcl_color"].detach().cpu().numpy()
    if args.use_color:
        pcl_color = (pcl_color * 256 + MEAN_COLOR_RGB).astype(np.int64)

    # from network outputs
    # detection
    pred_objectness = torch.argmax(data['objectness_scores'],
                                   2).float().detach().cpu().numpy()
    pred_center = data['center'].detach().cpu().numpy()  # (B,K,3)
    pred_heading_class = torch.argmax(data['heading_scores'],
                                      -1)  # B,num_proposal
    pred_heading_residual = torch.gather(
        data['heading_residuals'], 2,
        pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
    pred_heading_class = pred_heading_class.detach().cpu().numpy(
    )  # B,num_proposal
    pred_heading_residual = pred_heading_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal
    pred_size_class = torch.argmax(data['size_scores'], -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        data['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_residual = pred_size_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal,3
    # reference
    pred_ref_scores = data["cluster_ref"].detach().cpu().numpy()
    pred_ref_scores_softmax = F.softmax(
        data["cluster_ref"] *
        torch.argmax(data['objectness_scores'], 2).float() * data['pred_mask'],
        dim=1).detach().cpu().numpy()
    # post-processing
    nms_masks = data['pred_mask'].detach().cpu().numpy()  # B,num_proposal

    # ground truth
    gt_center = data['center_label'].cpu().numpy()  # (B,MAX_NUM_OBJ,3)
    gt_heading_class = data['heading_class_label'].cpu().numpy()  # B,K2
    gt_heading_residual = data['heading_residual_label'].cpu().numpy()  # B,K2
    gt_size_class = data['size_class_label'].cpu().numpy()  # B,K2
    gt_size_residual = data['size_residual_label'].cpu().numpy()  # B,K2,3
    # reference
    gt_ref_labels = data["ref_box_label"].detach().cpu().numpy()

    for i in range(batch_size):
        # basic info
        idx = ids[i]
        scene_id = scanrefer[idx]["scene_id"]
        object_id = scanrefer[idx]["object_id"]
        object_name = scanrefer[idx]["object_name"]
        ann_id = scanrefer[idx]["ann_id"]

        # scene_output
        scene_dump_dir = os.path.join(dump_dir, scene_id)
        if not os.path.exists(scene_dump_dir):
            os.mkdir(scene_dump_dir)

            # # Dump the original scene point clouds
            mesh = align_mesh(scene_id)
            mesh.write(os.path.join(scene_dump_dir, 'mesh.ply'))

            write_ply_rgb(point_clouds[i], pcl_color[i],
                          os.path.join(scene_dump_dir, 'pc.ply'))

        # filter out the valid ground truth reference box
        assert gt_ref_labels[i].shape[0] == gt_center[i].shape[0]
        gt_ref_idx = np.argmax(gt_ref_labels[i], 0)

        # visualize the gt reference box
        # NOTE: for each object there should be only one gt reference box
        object_dump_dir = os.path.join(
            dump_dir, scene_id, "gt_{}_{}.ply".format(object_id, object_name))
        gt_obb = config.param2obb(gt_center[i, gt_ref_idx,
                                            0:3], gt_heading_class[i,
                                                                   gt_ref_idx],
                                  gt_heading_residual[i, gt_ref_idx],
                                  gt_size_class[i, gt_ref_idx],
                                  gt_size_residual[i, gt_ref_idx])
        gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])

        if not os.path.exists(object_dump_dir):
            write_bbox(
                gt_obb, 0,
                os.path.join(scene_dump_dir,
                             'gt_{}_{}.ply'.format(object_id, object_name)))

        # find the valid reference prediction
        pred_masks = nms_masks[i] * pred_objectness[i] == 1
        assert pred_ref_scores[i].shape[0] == pred_center[i].shape[0]
        pred_ref_idx = np.argmax(pred_ref_scores[i] * pred_masks, 0)
        assigned_gt = torch.gather(
            data["ref_box_label"], 1,
            data["object_assignment"]).detach().cpu().numpy()

        # visualize the predicted reference box
        pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3],
                                    pred_heading_class[i, pred_ref_idx],
                                    pred_heading_residual[i, pred_ref_idx],
                                    pred_size_class[i, pred_ref_idx],
                                    pred_size_residual[i, pred_ref_idx])
        pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
        iou = box3d_iou(gt_bbox, pred_bbox)

        write_bbox(
            pred_obb, 1,
            os.path.join(
                scene_dump_dir, 'pred_{}_{}_{}_{:.5f}_{:.5f}.ply'.format(
                    object_id, object_name, ann_id,
                    pred_ref_scores_softmax[i, pred_ref_idx], iou)))
Esempio n. 6
0
def predict(args):
    print("predict bounding boxes...")
    # constant
    DC = ScannetDatasetConfig()

    # init training dataset
    print("preparing data...")
    scanrefer, scene_list = get_scanrefer(args)

    # dataloader
    _, dataloader = get_dataloader(args, scanrefer, scene_list, "test", DC)

    # model
    model = get_model(args, DC)

    # config
    POST_DICT = {
        "remove_empty_box": True, 
        "use_3d_nms": True, 
        "nms_iou": 0.25,
        "use_old_type_nms": False, 
        "cls_nms": True, 
        "per_class_proposal": True,
        "conf_thresh": 0.05,
        "dataset_config": DC
    } if not args.no_nms else None

    # predict
    print("predicting...")
    pred_bboxes = []
    for data_dict in tqdm(dataloader):
        for key in data_dict:
            data_dict[key] = data_dict[key].cuda()

        # feed
        data_dict = model(data_dict)
        _, data_dict = get_loss(
            data_dict=data_dict, 
            config=DC, 
            detection=False,
            reference=True
        )

        objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long()

        if POST_DICT:
            _ = parse_predictions(data_dict, POST_DICT)
            nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()

            # construct valid mask
            pred_masks = (nms_masks * objectness_preds_batch == 1).float()
        else:
            # construct valid mask
            pred_masks = (objectness_preds_batch == 1).float()

        pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks, 1) # (B,)
        pred_center = data_dict['center'] # (B,K,3)
        pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal
        pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
        pred_heading_class = pred_heading_class # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(2) # B,num_proposal
        pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal
        pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
        pred_size_class = pred_size_class
        pred_size_residual = pred_size_residual.squeeze(2) # B,num_proposal,3

        for i in range(pred_ref.shape[0]):
            # compute the iou
            pred_ref_idx = pred_ref[i]
            pred_obb = DC.param2obb(
                pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(), 
                pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(), 
                pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(),
                pred_size_class[i, pred_ref_idx].detach().cpu().numpy(), 
                pred_size_residual[i, pred_ref_idx].detach().cpu().numpy()
            )
            pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])

            # construct the multiple mask
            multiple = data_dict["unique_multiple"][i].item()

            # construct the others mask
            others = 1 if data_dict["object_cat"][i] == 17 else 0

            # store data
            scanrefer_idx = data_dict["scan_idx"][i].item()
            pred_data = {
                "scene_id": scanrefer[scanrefer_idx]["scene_id"],
                "object_id": scanrefer[scanrefer_idx]["object_id"],
                "ann_id": scanrefer[scanrefer_idx]["ann_id"],
                "bbox": pred_bbox.tolist(),
                "unique_multiple": multiple,
                "others": others
            }
            pred_bboxes.append(pred_data)

    # dump
    print("dumping...")
    pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json")
    with open(pred_path, "w") as f:
        json.dump(pred_bboxes, f, indent=4)

    print("done!")
Esempio n. 7
0
    def __getitem__(self, idx):
        start = time.time()
        scene_id = self.scanrefer[idx]["scene_id"]
        object_id = int(self.scanrefer[idx]["object_id"])
        object_name = " ".join(self.scanrefer[idx]["object_name"].split("_"))
        ann_id = self.scanrefer[idx]["ann_id"]

        # get language features
        lang_feat = self.lang[scene_id][str(object_id)][ann_id]
        lang_len = len(self.scanrefer[idx]["token"]) + 2
        lang_len = lang_len if lang_len <= CONF.TRAIN.MAX_DES_LEN + 2 else CONF.TRAIN.MAX_DES_LEN + 2

        # get pc
        mesh_vertices = self.scene_data[scene_id]["mesh_vertices"]
        instance_labels = self.scene_data[scene_id]["instance_labels"]
        semantic_labels = self.scene_data[scene_id]["semantic_labels"]
        instance_bboxes = self.scene_data[scene_id]["instance_bboxes"]

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:,
                        3:6] = (point_cloud[:, 3:6] - MEAN_COLOR_RGB) / 256.0
            pcl_color = point_cloud[:, 3:6]

        if self.use_normal:
            normals = mesh_vertices[:, 6:9]
            point_cloud = np.concatenate([point_cloud, normals], 1)

        if self.use_multiview:
            # load multiview database
            if self.multiview_data == {}:
                self.multiview_data = h5py.File(MULTIVIEW_DATA,
                                                "r",
                                                libver="latest")

            multiview = self.multiview_data[scene_id]
            point_cloud = np.concatenate([point_cloud, multiview], 1)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        point_cloud, choices = random_sampling(point_cloud,
                                               self.num_points,
                                               return_choices=True)
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]
        pcl_color = pcl_color[choices]

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        ref_box_label = np.zeros(
            MAX_NUM_OBJ)  # bbox label for reference target
        ref_center_label = np.zeros(3)  # bbox center for reference target
        ref_heading_class_label = 0
        ref_heading_residual_label = 0
        ref_size_class_label = 0
        ref_size_residual_label = np.zeros(
            3)  # bbox size residual for reference target
        ref_box_corner_label = np.zeros((8, 3))

        if self.split != "test":
            num_bbox = instance_bboxes.shape[
                0] if instance_bboxes.shape[0] < MAX_NUM_OBJ else MAX_NUM_OBJ
            target_bboxes_mask[0:num_bbox] = 1
            target_bboxes[0:num_bbox, :] = instance_bboxes[:MAX_NUM_OBJ, 0:6]

            point_votes = np.zeros([self.num_points, 3])
            point_votes_mask = np.zeros(self.num_points)

            # ------------------------------- DATA AUGMENTATION ------------------------------
            if self.augment:
                if np.random.random() > 0.5:
                    # Flipping along the YZ plane
                    point_cloud[:, 0] = -1 * point_cloud[:, 0]
                    target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

                if np.random.random() > 0.5:
                    # Flipping along the XZ plane
                    point_cloud[:, 1] = -1 * point_cloud[:, 1]
                    target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

                # Rotation along X-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotx(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "x")

                # Rotation along Y-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = roty(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "y")

                # Rotation along up-axis/Z-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotz(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "z")

                # Translation
                point_cloud, target_bboxes = self._translate(
                    point_cloud, target_bboxes)

            # compute votes *AFTER* augmentation
            # generate votes
            # Note: since there's no map between bbox instance labels and
            # pc instance_labels (it had been filtered
            # in the data preparation step) we'll compute the instance bbox
            # from the points sharing the same instance label.
            for i_instance in np.unique(instance_labels):
                # find all points belong to that instance
                ind = np.where(instance_labels == i_instance)[0]
                # find the semantic label
                if semantic_labels[ind[0]] in DC.nyu40ids:
                    x = point_cloud[ind, :3]
                    center = 0.5 * (x.min(0) + x.max(0))
                    point_votes[ind, :] = center - x
                    point_votes_mask[ind] = 1.0
            point_votes = np.tile(point_votes,
                                  (1, 3))  # make 3 votes identical

            class_ind = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:num_bbox, -2]
            ]
            # NOTE: set size class as semantic class. Consider use size2class.
            size_classes[0:num_bbox] = class_ind
            size_residuals[0:num_bbox, :] = target_bboxes[
                0:num_bbox, 3:6] - DC.mean_size_arr[class_ind, :]

            # construct the reference target label for each bbox
            ref_box_label = np.zeros(MAX_NUM_OBJ)
            for i, gt_id in enumerate(instance_bboxes[:num_bbox, -1]):
                if gt_id == object_id:
                    ref_box_label[i] = 1
                    ref_center_label = target_bboxes[i, 0:3]
                    ref_heading_class_label = angle_classes[i]
                    ref_heading_residual_label = angle_residuals[i]
                    ref_size_class_label = size_classes[i]
                    ref_size_residual_label = size_residuals[i]

                    # construct ground truth box corner coordinates
                    ref_obb = DC.param2obb(ref_center_label,
                                           ref_heading_class_label,
                                           ref_heading_residual_label,
                                           ref_size_class_label,
                                           ref_size_residual_label)
                    ref_box_corner_label = get_3d_box(ref_obb[3:6], ref_obb[6],
                                                      ref_obb[0:3])

            # construct all GT bbox corners
            all_obb = DC.param2obb_batch(
                target_bboxes[:num_bbox,
                              0:3], angle_classes[:num_bbox].astype(np.int64),
                angle_residuals[:num_bbox],
                size_classes[:num_bbox].astype(np.int64),
                size_residuals[:num_bbox])
            all_box_corner_label = get_3d_box_batch(all_obb[:, 3:6],
                                                    all_obb[:, 6],
                                                    all_obb[:, 0:3])

            # store
            gt_box_corner_label = np.zeros((MAX_NUM_OBJ, 8, 3))
            gt_box_masks = np.zeros((MAX_NUM_OBJ, ))
            gt_box_object_ids = np.zeros((MAX_NUM_OBJ, ))

            gt_box_corner_label[:num_bbox] = all_box_corner_label
            gt_box_masks[:num_bbox] = 1
            gt_box_object_ids[:num_bbox] = instance_bboxes[:, -1]
        else:
            num_bbox = 1
            point_votes = np.zeros([self.num_points,
                                    9])  # make 3 votes identical
            point_votes_mask = np.zeros(self.num_points)

        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_object_ids = np.zeros(
            (MAX_NUM_OBJ, ))  # object ids of all objects
        try:
            target_bboxes_semcls[0:num_bbox] = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:, -2][0:num_bbox]
            ]
            target_object_ids[0:num_bbox] = instance_bboxes[:, -1][0:num_bbox]
        except KeyError:
            pass

        object_cat = self.raw2label[
            object_name] if object_name in self.raw2label else 17

        data_dict = {}
        data_dict["point_clouds"] = point_cloud.astype(
            np.float32
        )  # point cloud data including features    [B,max_num_points,3]
        data_dict["lang_feat"] = lang_feat.astype(
            np.float32)  # language feature vectors     [B,32,300]
        data_dict["lang_len"] = np.array(lang_len).astype(
            np.int64)  # length of each description    [B]
        data_dict["lang_ids"] = np.array(
            self.lang_ids[scene_id][str(object_id)][ann_id]).astype(
                np.int64)  #     [B,32,300]
        #all data with MAX_NUM_OBJ are mostly filled with zeros
        data_dict["center_label"] = target_bboxes.astype(
            np.float32
        )[:, 0:3]  # (MAX_NUM_OBJ, 3) for GT box center XYZ  # [B,128,3]
        data_dict["heading_class_label"] = angle_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1  [B,128]
        data_dict["heading_residual_label"] = angle_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ,) [B,128]
        data_dict["size_class_label"] = size_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER  [B,128]
        data_dict["size_residual_label"] = size_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ, 3) [B,128,3]
        data_dict["num_bbox"] = np.array(num_bbox).astype(np.int64)  # [B]
        data_dict["sem_cls_label"] = target_bboxes_semcls.astype(
            np.int64)  # (MAX_NUM_OBJ,) semantic class index
        data_dict["scene_object_ids"] = target_object_ids.astype(
            np.int64)  # (MAX_NUM_OBJ,) object ids of all objects
        data_dict["box_label_mask"] = target_bboxes_mask.astype(
            np.float32)  # (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
        data_dict["vote_label"] = point_votes.astype(np.float32)  # [B,40000,9]
        data_dict["vote_label_mask"] = point_votes_mask.astype(
            np.int64)  # [B,40000]
        data_dict["dataset_idx"] = np.array(idx).astype(
            np.int64)  # [B] object indices from self.scanrefer
        data_dict["pcl_color"] = pcl_color
        data_dict["ref_box_label"] = ref_box_label.astype(
            np.int64)  # 0/1 reference labels for each object bbox
        data_dict["ref_center_label"] = ref_center_label.astype(np.float32)
        data_dict["ref_heading_class_label"] = np.array(
            int(ref_heading_class_label)).astype(np.int64)
        data_dict["ref_heading_residual_label"] = np.array(
            int(ref_heading_residual_label)).astype(np.int64)
        data_dict["ref_size_class_label"] = np.array(
            int(ref_size_class_label)).astype(np.int64)
        data_dict["ref_size_residual_label"] = ref_size_residual_label.astype(
            np.float32)
        data_dict["ref_box_corner_label"] = ref_box_corner_label.astype(
            np.float64)  # target box corners NOTE type must be
        data_dict["gt_box_corner_label"] = gt_box_corner_label.astype(
            np.float64)  # all GT box corners NOTE type must be double
        data_dict["gt_box_masks"] = gt_box_masks.astype(
            np.int64)  # valid bbox masks
        data_dict["gt_box_object_ids"] = gt_box_object_ids.astype(
            np.int64)  # valid bbox object ids
        data_dict["object_id"] = np.array(int(object_id)).astype(
            np.int64)  # [B] target object_ids
        data_dict["ann_id"] = np.array(int(ann_id)).astype(np.int64)  # [B]
        data_dict["object_cat"] = np.array(object_cat).astype(
            np.int64)  # [B] target object classes
        data_dict["unique_multiple"] = np.array(
            self.unique_multiple_lookup[scene_id][str(
                object_id)][ann_id]).astype(np.int64)
        data_dict["pcl_color"] = pcl_color  # [B,40000,3]
        data_dict["load_time"] = time.time() - start

        return data_dict
Esempio n. 8
0
def get_loss(data_dict,
             config,
             reference=False,
             use_lang_classifier=False,
             use_max_iou=False,
             post_processing=None):
    """ Loss functions

    Args:
        data_dict: dict
        config: dataset config instance
        reference: flag (False/True)
        post_processing: config dict
    Returns:
        loss: pytorch scalar tensor
        data_dict: dict
    """

    # Vote loss
    vote_loss = compute_vote_loss(data_dict)
    data_dict['vote_loss'] = vote_loss

    # Obj loss
    objectness_loss, objectness_label, objectness_mask, object_assignment = compute_objectness_loss(
        data_dict)
    data_dict['objectness_loss'] = objectness_loss
    data_dict['objectness_label'] = objectness_label
    data_dict['objectness_mask'] = objectness_mask
    data_dict['object_assignment'] = object_assignment
    total_num_proposal = objectness_label.shape[0] * objectness_label.shape[1]
    data_dict['pos_ratio'] = torch.sum(
        objectness_label.float().cuda()) / float(total_num_proposal)
    data_dict['neg_ratio'] = torch.sum(objectness_mask.float()) / float(
        total_num_proposal) - data_dict['pos_ratio']

    # Box loss and sem cls loss
    center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = compute_box_and_sem_cls_loss(
        data_dict, config)
    data_dict['center_loss'] = center_loss
    data_dict['heading_cls_loss'] = heading_cls_loss
    data_dict['heading_reg_loss'] = heading_reg_loss
    data_dict['size_cls_loss'] = size_cls_loss
    data_dict['size_reg_loss'] = size_reg_loss
    data_dict['sem_cls_loss'] = sem_cls_loss
    box_loss = center_loss + 0.1 * heading_cls_loss + heading_reg_loss + 0.1 * size_cls_loss + size_reg_loss
    data_dict['box_loss'] = box_loss

    if reference:
        # Reference loss
        ref_loss, lang_loss, cluster_preds_scores, cluster_labels = compute_reference_loss(
            data_dict, config, use_lang_classifier, use_max_iou)
        data_dict["ref_loss"] = ref_loss
        data_dict["lang_loss"] = lang_loss

        objectness_preds_batch = torch.argmax(data_dict['objectness_scores'],
                                              2).long()
        objectness_labels_batch = objectness_label.long()

        if post_processing:
            _ = parse_predictions(data_dict, post_processing)
            nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()

            # construct valid mask
            pred_masks = (nms_masks * objectness_preds_batch == 1).float()
            label_masks = (objectness_labels_batch == 1).float()
        else:
            # construct valid mask
            pred_masks = (objectness_preds_batch == 1).float()
            label_masks = (objectness_labels_batch == 1).float()

        data_dict["pred_mask"] = pred_masks
        data_dict["label_mask"] = label_masks

        cluster_preds = torch.argmax(cluster_preds_scores * pred_masks,
                                     1).long().unsqueeze(1).repeat(
                                         1, pred_masks.shape[1])
        preds = torch.zeros(pred_masks.shape).cuda()
        preds = preds.scatter_(1, cluster_preds, 1)
        cluster_preds = preds
        cluster_labels = cluster_labels.float()
        cluster_labels *= label_masks

        # compute classification scores
        corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1),
                             dim=1).float()
        labels = torch.ones(corrects.shape[0]).cuda()

        ref_acc = corrects / (labels + 1e-8)

        # store
        data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist()

        # compute localization metrics
        pred_ref = torch.argmax(
            data_dict['cluster_ref'] * data_dict['pred_mask'],
            1).detach().cpu().numpy()  # (B,)
        pred_center = data_dict['center'].detach().cpu().numpy()  # (B,K,3)
        pred_heading_class = torch.argmax(data_dict['heading_scores'],
                                          -1)  # B,num_proposal
        pred_heading_residual = torch.gather(
            data_dict['heading_residuals'], 2,
            pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
        pred_heading_class = pred_heading_class.detach().cpu().numpy(
        )  # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal
        pred_size_class = torch.argmax(data_dict['size_scores'],
                                       -1)  # B,num_proposal
        pred_size_residual = torch.gather(
            data_dict['size_residuals'], 2,
            pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
                1, 1, 1, 3))  # B,num_proposal,1,3
        pred_size_class = pred_size_class.detach().cpu().numpy()
        pred_size_residual = pred_size_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal,3

        gt_ref = torch.argmax(data_dict["ref_box_label"],
                              1).detach().cpu().numpy()
        gt_center = data_dict['center_label'].cpu().numpy(
        )  # (B,MAX_NUM_OBJ,3)
        gt_heading_class = data_dict['heading_class_label'].cpu().numpy(
        )  # B,K2
        gt_heading_residual = data_dict['heading_residual_label'].cpu().numpy(
        )  # B,K2
        gt_size_class = data_dict['size_class_label'].cpu().numpy()  # B,K2
        gt_size_residual = data_dict['size_residual_label'].cpu().numpy(
        )  # B,K2,3

        ious = []
        multiple = []
        for i in range(pred_ref.shape[0]):
            # compute the iou
            pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i]
            pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3],
                                        pred_heading_class[i, pred_ref_idx],
                                        pred_heading_residual[i, pred_ref_idx],
                                        pred_size_class[i, pred_ref_idx],
                                        pred_size_residual[i, pred_ref_idx])
            gt_obb = config.param2obb(gt_center[i, gt_ref_idx, 0:3],
                                      gt_heading_class[i, gt_ref_idx],
                                      gt_heading_residual[i, gt_ref_idx],
                                      gt_size_class[i, gt_ref_idx],
                                      gt_size_residual[i, gt_ref_idx])
            pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
            gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])
            iou, _ = box3d_iou(pred_bbox, gt_bbox)
            ious.append(iou)

            # construct the multiple mask
            num_bbox = data_dict["num_bbox"][i]
            sem_cls_label = data_dict["sem_cls_label"][i]
            sem_cls_label[num_bbox:] -= 1
            num_choices = torch.sum(
                data_dict["object_cat"][i] == sem_cls_label)
            if num_choices > 1:
                multiple.append(1)
            else:
                multiple.append(0)

        # store
        data_dict["ref_iou"] = ious
        data_dict["ref_iou_rate_0.25"] = np.array(ious)[
            np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0]
        data_dict["ref_iou_rate_0.5"] = np.array(ious)[
            np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0]
        data_dict["ref_multiple_mask"] = multiple
    else:
        ref_loss = torch.zeros(1)[0].cuda()
        lang_loss = torch.zeros(1)[0].cuda()

    # Final loss function
    if use_max_iou:
        loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.1 * ref_loss + lang_loss
    else:
        loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.01 * ref_loss + lang_loss

    loss *= 10  # amplify

    data_dict['loss'] = loss

    # --------------------------------------------
    # Some other statistics
    obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2)  # B,K
    obj_acc = torch.sum((obj_pred_val == objectness_label.long()).float() *
                        objectness_mask) / (torch.sum(objectness_mask) + 1e-6)
    data_dict['obj_acc'] = obj_acc
    # precision, recall, f1
    corrects = torch.sum((obj_pred_val == 1) * (objectness_label == 1),
                         dim=1).float()
    preds = torch.sum(obj_pred_val == 1, dim=1).float()
    labels = torch.sum(objectness_label == 1, dim=1).float()
    precisions = corrects / (labels + 1e-8)
    recalls = corrects / (preds + 1e-8)
    f1s = 2 * precisions * recalls / (precisions + recalls + 1e-8)
    data_dict["objectness_precision"] = precisions.cpu().numpy().tolist()
    data_dict["objectness_recall"] = recalls.cpu().numpy().tolist()
    data_dict["objectness_f1"] = f1s.cpu().numpy().tolist()
    # lang
    if use_lang_classifier:
        data_dict["lang_acc"] = (torch.argmax(
            data_dict['lang_scores'],
            1) == data_dict["object_cat"]).float().mean()
    else:
        data_dict["lang_acc"] = torch.zeros(1)[0].cuda()

    return loss, data_dict
Esempio n. 9
0
def get_eval(data_dict,
             config,
             reference,
             use_lang_classifier=False,
             use_oracle=False,
             use_cat_rand=False,
             use_best=False,
             post_processing=None):
    """ Loss functions

    Args:
        data_dict: dict
        config: dataset config instance
        reference: flag (False/True)
        post_processing: config dict
    Returns:
        loss: pytorch scalar tensor
        data_dict: dict
    """

    batch_size, num_words, _ = data_dict["lang_feat"].shape

    objectness_preds_batch = torch.argmax(data_dict['objectness_scores'],
                                          2).long()
    objectness_labels_batch = data_dict['objectness_label'].long()

    if post_processing:
        _ = parse_predictions(data_dict, post_processing)
        nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()

        # construct valid mask
        pred_masks = (nms_masks * objectness_preds_batch == 1).float()
        label_masks = (objectness_labels_batch == 1).float()
    else:
        # construct valid mask
        pred_masks = (objectness_preds_batch == 1).float()
        label_masks = (objectness_labels_batch == 1).float()

    cluster_preds = torch.argmax(data_dict["cluster_ref"] * pred_masks,
                                 1).long().unsqueeze(1).repeat(
                                     1, pred_masks.shape[1])
    preds = torch.zeros(pred_masks.shape).cuda()
    preds = preds.scatter_(1, cluster_preds, 1)
    cluster_preds = preds
    cluster_labels = data_dict["cluster_labels"].float()
    cluster_labels *= label_masks

    # compute classification scores
    corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1),
                         dim=1).float()
    labels = torch.ones(corrects.shape[0]).cuda()
    ref_acc = corrects / (labels + 1e-8)

    # store
    data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist()

    # compute localization metrics
    if use_best:
        pred_ref = torch.argmax(data_dict["cluster_labels"], 1)  # (B,)
        # store the calibrated predictions and masks
        data_dict['cluster_ref'] = data_dict["cluster_labels"]
    if use_cat_rand:
        cluster_preds = torch.zeros(cluster_labels.shape).cuda()
        for i in range(cluster_preds.shape[0]):
            num_bbox = data_dict["num_bbox"][i]
            sem_cls_label = data_dict["sem_cls_label"][i]
            # sem_cls_label = torch.argmax(end_points["sem_cls_scores"], 2)[i]
            sem_cls_label[num_bbox:] -= 1
            candidate_masks = torch.gather(
                sem_cls_label == data_dict["object_cat"][i], 0,
                data_dict["object_assignment"][i])
            candidates = torch.arange(cluster_labels.shape[1])[candidate_masks]
            try:
                chosen_idx = torch.randperm(candidates.shape[0])[0]
                chosen_candidate = candidates[chosen_idx]
                cluster_preds[i, chosen_candidate] = 1
            except IndexError:
                cluster_preds[i, candidates] = 1

        pred_ref = torch.argmax(cluster_preds, 1)  # (B,)
        # store the calibrated predictions and masks
        data_dict['cluster_ref'] = cluster_preds
    else:
        pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks,
                                1)  # (B,)
        # store the calibrated predictions and masks
        data_dict['cluster_ref'] = data_dict['cluster_ref'] * pred_masks

    if use_oracle:
        pred_center = data_dict['center_label']  # (B,MAX_NUM_OBJ,3)
        pred_heading_class = data_dict['heading_class_label']  # B,K2
        pred_heading_residual = data_dict['heading_residual_label']  # B,K2
        pred_size_class = data_dict['size_class_label']  # B,K2
        pred_size_residual = data_dict['size_residual_label']  # B,K2,3

        # assign
        pred_center = torch.gather(
            pred_center, 1,
            data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3))
        pred_heading_class = torch.gather(pred_heading_class, 1,
                                          data_dict["object_assignment"])
        pred_heading_residual = torch.gather(
            pred_heading_residual, 1,
            data_dict["object_assignment"]).unsqueeze(-1)
        pred_size_class = torch.gather(pred_size_class, 1,
                                       data_dict["object_assignment"])
        pred_size_residual = torch.gather(
            pred_size_residual, 1,
            data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3))
    else:
        pred_center = data_dict['center']  # (B,K,3)
        pred_heading_class = torch.argmax(data_dict['heading_scores'],
                                          -1)  # B,num_proposal
        pred_heading_residual = torch.gather(
            data_dict['heading_residuals'], 2,
            pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
        pred_heading_class = pred_heading_class  # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(
            2)  # B,num_proposal
        pred_size_class = torch.argmax(data_dict['size_scores'],
                                       -1)  # B,num_proposal
        pred_size_residual = torch.gather(
            data_dict['size_residuals'], 2,
            pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
                1, 1, 1, 3))  # B,num_proposal,1,3
        pred_size_class = pred_size_class
        pred_size_residual = pred_size_residual.squeeze(2)  # B,num_proposal,3

    # store
    data_dict["pred_mask"] = pred_masks
    data_dict["label_mask"] = label_masks
    data_dict['pred_center'] = pred_center
    data_dict['pred_heading_class'] = pred_heading_class
    data_dict['pred_heading_residual'] = pred_heading_residual
    data_dict['pred_size_class'] = pred_size_class
    data_dict['pred_size_residual'] = pred_size_residual

    gt_ref = torch.argmax(data_dict["ref_box_label"], 1)
    gt_center = data_dict['center_label']  # (B,MAX_NUM_OBJ,3)
    gt_heading_class = data_dict['heading_class_label']  # B,K2
    gt_heading_residual = data_dict['heading_residual_label']  # B,K2
    gt_size_class = data_dict['size_class_label']  # B,K2
    gt_size_residual = data_dict['size_residual_label']  # B,K2,3

    ious = []
    multiple = []
    others = []
    pred_bboxes = []
    gt_bboxes = []
    for i in range(pred_ref.shape[0]):
        # compute the iou
        pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i]
        pred_obb = config.param2obb(
            pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(),
            pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(),
            pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(),
            pred_size_class[i, pred_ref_idx].detach().cpu().numpy(),
            pred_size_residual[i, pred_ref_idx].detach().cpu().numpy())
        gt_obb = config.param2obb(
            gt_center[i, gt_ref_idx, 0:3].detach().cpu().numpy(),
            gt_heading_class[i, gt_ref_idx].detach().cpu().numpy(),
            gt_heading_residual[i, gt_ref_idx].detach().cpu().numpy(),
            gt_size_class[i, gt_ref_idx].detach().cpu().numpy(),
            gt_size_residual[i, gt_ref_idx].detach().cpu().numpy())
        pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
        gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])
        iou = eval_ref_one_sample(pred_bbox, gt_bbox)
        ious.append(iou)

        # NOTE: get_3d_box() will return problematic bboxes
        pred_bbox = construct_bbox_corners(pred_obb[0:3], pred_obb[3:6])
        gt_bbox = construct_bbox_corners(gt_obb[0:3], gt_obb[3:6])
        pred_bboxes.append(pred_bbox)
        gt_bboxes.append(gt_bbox)

        # construct the multiple mask
        multiple.append(data_dict["unique_multiple"][i].item())

        # construct the others mask
        flag = 1 if data_dict["object_cat"][i] == 17 else 0
        others.append(flag)

    # lang
    if reference and use_lang_classifier:
        data_dict["lang_acc"] = (torch.argmax(
            data_dict['lang_scores'],
            1) == data_dict["object_cat"]).float().mean()
    else:
        data_dict["lang_acc"] = torch.zeros(1)[0].cuda()

    # store
    data_dict["ref_iou"] = ious
    data_dict["ref_iou_rate_0.25"] = np.array(ious)[
        np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0]
    data_dict["ref_iou_rate_0.5"] = np.array(ious)[
        np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0]
    data_dict["ref_multiple_mask"] = multiple
    data_dict["ref_others_mask"] = others
    data_dict["pred_bboxes"] = pred_bboxes
    data_dict["gt_bboxes"] = gt_bboxes

    # --------------------------------------------
    # Some other statistics
    obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2)  # B,K
    obj_acc = torch.sum(
        (obj_pred_val == data_dict['objectness_label'].long()).float() *
        data_dict['objectness_mask']) / (
            torch.sum(data_dict['objectness_mask']) + 1e-6)
    data_dict['obj_acc'] = obj_acc
    # detection semantic classification
    sem_cls_label = torch.gather(
        data_dict['sem_cls_label'], 1,
        data_dict['object_assignment'])  # select (B,K) from (B,K2)
    sem_cls_pred = data_dict['sem_cls_scores'].argmax(-1)  # (B,K)
    sem_match = (sem_cls_label == sem_cls_pred).float()
    data_dict["sem_acc"] = (sem_match * data_dict["pred_mask"]
                            ).sum() / data_dict["pred_mask"].sum()

    return data_dict
Esempio n. 10
0
def parse_predictions(end_points, config_dict):
    """ Parse predictions to OBB parameters and suppress overlapping boxes
    
    Args:
        end_points: dict
            {point_clouds, center, heading_scores, heading_residuals,
            size_scores, size_residuals, sem_cls_scores}
        config_dict: dict
            {dataset_config, remove_empty_box, use_3d_nms, nms_iou,
            use_old_type_nms, conf_thresh, per_class_proposal}

    Returns:
        batch_pred_map_cls: a list of len == batch size (BS)
            [pred_list_i], i = 0, 1, ..., BS-1
            where pred_list_i = [(pred_sem_cls, box_params, box_score)_j]
            where j = 0, ..., num of valid detections - 1 from sample input i
    """
    pred_center = end_points['center']  # B,num_proposal,3
    pred_heading_class = torch.argmax(end_points['heading_scores'],
                                      -1)  # B,num_proposal
    pred_heading_residual = torch.gather(
        end_points['heading_residuals'], 2,
        pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
    pred_heading_residual.squeeze_(2)
    pred_size_class = torch.argmax(end_points['size_scores'],
                                   -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        end_points['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_residual.squeeze_(2)
    pred_sem_cls = torch.argmax(end_points['sem_cls_scores'],
                                -1)  # B,num_proposal
    sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().numpy(
    ))  # B,num_proposal,10
    pred_sem_cls_prob = np.max(sem_cls_probs, -1)  # B,num_proposal

    num_proposal = pred_center.shape[1]
    # Since we operate in upright_depth coord for points, while util functions
    # assume upright_camera coord.
    bsize = pred_center.shape[0]
    pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3))
    # pred_center_upright_camera = flip_axis_to_camera(pred_center.detach().cpu().numpy())
    pred_center_upright_camera = pred_center.detach().cpu().numpy()
    for i in range(bsize):
        for j in range(num_proposal):
            heading_angle = config_dict['dataset_config'].class2angle(\
                pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy())
            box_size = config_dict['dataset_config'].class2size(\
                int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy())
            corners_3d_upright_camera = get_3d_box(
                box_size, heading_angle, pred_center_upright_camera[i, j, :])
            pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera

    K = pred_center.shape[1]  # K==num_proposal
    nonempty_box_mask = np.ones((bsize, K))

    if config_dict['remove_empty_box']:
        # -------------------------------------
        # Remove predicted boxes without any point within them..
        batch_pc = end_points['point_clouds'].cpu().numpy()[:, :, 0:3]  # B,N,3
        for i in range(bsize):
            pc = batch_pc[i, :, :]  # (N,3)
            for j in range(K):
                box3d = pred_corners_3d_upright_camera[i, j, :, :]  # (8,3)
                # box3d = flip_axis_to_depth(box3d)
                pc_in_box, inds = extract_pc_in_box3d(pc, box3d)
                if len(pc_in_box) < 5:
                    nonempty_box_mask[i, j] = 0
        # -------------------------------------

    obj_logits = end_points['objectness_scores'].detach().cpu().numpy()
    obj_prob = softmax(obj_logits)[:, :, 1]  # (B,K)
    if not config_dict['use_3d_nms']:
        # ---------- NMS input: pred_with_prob in (B,K,7) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_2d_with_prob = np.zeros((K, 5))
            for j in range(K):
                boxes_2d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_2d_with_prob[j, 2] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_2d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_2d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_2d_with_prob[j, 4] = obj_prob[i, j]
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_2d_faster(
                boxes_2d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------
    elif config_dict['use_3d_nms'] and (not config_dict['cls_nms']):
        # ---------- NMS input: pred_with_prob in (B,K,7) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_3d_with_prob = np.zeros((K, 7))
            for j in range(K):
                boxes_3d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 2] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 4] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 5] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 6] = obj_prob[i, j]
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_3d_faster(
                boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------
    elif config_dict['use_3d_nms'] and config_dict['cls_nms']:
        # ---------- NMS input: pred_with_prob in (B,K,8) -----------
        pred_mask = np.zeros((bsize, K))
        for i in range(bsize):
            boxes_3d_with_prob = np.zeros((K, 8))
            for j in range(K):
                boxes_3d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 2] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 4] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 5] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 6] = obj_prob[i, j]
                boxes_3d_with_prob[j, 7] = pred_sem_cls[
                    i,
                    j]  # only suppress if the two boxes are of the same class!!
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]
            pick = nms_3d_faster_samecls(
                boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 1
        end_points['pred_mask'] = pred_mask
        # ---------- NMS output: pred_mask in (B,K) -----------

    batch_pred_map_cls = [
    ]  # a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1)
    for i in range(bsize):
        if config_dict['per_class_proposal']:
            cur_list = []
            for ii in range(config_dict['dataset_config'].num_class):
                cur_list += [(ii, pred_corners_3d_upright_camera[i,j], sem_cls_probs[i,j,ii]*obj_prob[i,j]) \
                    for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']]
            batch_pred_map_cls.append(cur_list)
        else:
            batch_pred_map_cls.append([(pred_sem_cls[i,j].item(), pred_corners_3d_upright_camera[i,j], obj_prob[i,j]) \
                for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']])
    end_points['batch_pred_map_cls'] = batch_pred_map_cls

    return batch_pred_map_cls
Esempio n. 11
0
def get_pseudo_labels(end_points, ema_end_points, pred_center, pred_sem_cls,
                      pred_objectness, pred_heading_scores,
                      pred_heading_residuals, pred_size_scores,
                      pred_size_residuals, pred_vote_xyz, config_dict):
    batch_size, num_proposal = pred_center.shape[:2]
    label_mask = torch.zeros((batch_size, MAX_NUM_OBJ),
                             dtype=torch.long).cuda()

    # obj score threshold
    pred_objectness = nn.Softmax(dim=2)(pred_objectness)
    # the second element is positive score
    pos_obj = pred_objectness[:, :, 1]
    neg_obj = pred_objectness[:, :, 0]
    objectness_mask = pos_obj > config_dict['obj_threshold']
    neg_objectness_mask = neg_obj > 0.9  # deprecated

    # cls score threshold
    pred_sem_cls = nn.Softmax(dim=2)(pred_sem_cls)
    max_cls, argmax_cls = torch.max(pred_sem_cls, dim=2)
    cls_mask = max_cls > config_dict['cls_threshold']

    supervised_mask = end_points['supervised_mask']
    unsupervised_inds = torch.nonzero(1 - supervised_mask).squeeze(1).long()

    iou_pred = nn.Sigmoid()(ema_end_points['iou_scores'][unsupervised_inds,
                                                         ...])
    if iou_pred.shape[2] > 1:
        iou_pred = torch.gather(iou_pred, 2, argmax_cls.unsqueeze(-1)).squeeze(
            -1)  # use pred semantic labels
    else:
        iou_pred = iou_pred.squeeze(-1)

    if config_dict['view_stats']:
        # GT IoU labels (cheating) only for analyzing performance
        iou_labels, objectness_label, object_assignment = compute_iou_labels(
            end_points, unsupervised_inds, pred_vote_xyz, pred_center,
            pred_sem_cls, pred_objectness, pred_heading_scores,
            pred_heading_residuals, pred_size_scores, pred_size_residuals,
            config_dict)
        end_points['unlabeled_iou_labels'] = iou_labels
        end_points['unlabeled_pred_iou_value'] = torch.sum(
            iou_labels) / iou_labels.view(-1).shape[0]
        end_points['unlabeled_pred_iou_obj_value'] = torch.sum(
            iou_labels * objectness_label) / (torch.sum(objectness_label) +
                                              1e-6)

        iou_acc = torch.abs(iou_pred - iou_labels)
        end_points['unlabeled_iou_acc'] = torch.sum(iou_acc) / iou_acc.view(
            -1).shape[0]
        obj_true_num = (torch.sum(objectness_label) + 1e-6)
        end_points['unlabeled_iou_obj_acc'] = torch.sum(
            iou_acc * objectness_label) / obj_true_num

        # for coverage calculation, associates every gt with pseudo labels
        gt_to_pseudo_iou = compute_iou_labels(end_points,
                                              unsupervised_inds,
                                              pred_vote_xyz,
                                              pred_center,
                                              pred_sem_cls,
                                              pred_objectness,
                                              pred_heading_scores,
                                              pred_heading_residuals,
                                              pred_size_scores,
                                              pred_size_residuals,
                                              config_dict,
                                              reverse=True)

    iou_threshold = config_dict['iou_threshold']
    iou_mask = iou_pred > iou_threshold
    before_iou_mask = torch.logical_and(cls_mask, objectness_mask)
    final_mask = torch.logical_and(before_iou_mask, iou_mask)

    # we only keep MAX_NUM_OBJ predictions
    # however, after filtering the number can still exceed this
    # so we keep the ones with larger pos_obj * max_cls
    inds = torch.argsort(pos_obj * max_cls * final_mask,
                         dim=1,
                         descending=True)

    inds = inds[:, :MAX_NUM_OBJ].long()
    final_mask_sorted = torch.gather(final_mask, dim=1, index=inds)
    end_points['pseudo_gt_ratio'] = torch.sum(
        final_mask_sorted).float() / final_mask_sorted.view(-1).shape[0]

    neg_objectness_mask = torch.gather(neg_objectness_mask, dim=1, index=inds)

    max_size, argmax_size = torch.max(pred_size_scores, dim=2)
    size_inds = argmax_size.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, -1, 3)
    max_heading, argmax_heading = torch.max(pred_heading_scores, dim=2)
    heading_inds = argmax_heading.unsqueeze(-1)

    # now only one class residuals
    pred_heading_residuals = torch.gather(pred_heading_residuals,
                                          dim=2,
                                          index=heading_inds).squeeze(2)
    pred_size_residuals = torch.gather(pred_size_residuals,
                                       dim=2,
                                       index=size_inds).squeeze(2)

    if config_dict['use_lhs']:
        pred_center_ = torch.gather(pred_center,
                                    dim=1,
                                    index=inds.unsqueeze(-1).expand(-1, -1, 3))
        pred_heading_class_ = torch.gather(argmax_heading, dim=1, index=inds)
        pred_heading_residual_ = torch.gather(pred_heading_residuals,
                                              dim=1,
                                              index=inds)
        pred_size_class_ = torch.gather(argmax_size, dim=1, index=inds)
        pred_size_residual_ = torch.gather(pred_size_residuals,
                                           dim=1,
                                           index=inds.unsqueeze(-1).expand(
                                               -1, -1, 3))
        num_proposal = pred_center_.shape[1]
        bsize = pred_center_.shape[0]
        pred_box_parameters = np.zeros((bsize, num_proposal, 7),
                                       dtype=np.float32)
        pred_box_parameters[:, :, 0:3] = pred_center_.detach().cpu().numpy()
        pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3),
                                                  dtype=np.float32)
        pred_center_upright_camera = flip_axis_to_camera(
            pred_center_.detach().cpu().numpy())
        for i in range(bsize):
            for j in range(num_proposal):
                heading_angle = config_dict['dataset_config'].class2angle( \
                    pred_heading_class_[i, j].detach().cpu().numpy(),
                    pred_heading_residual_[i, j].detach().cpu().numpy())
                box_size = config_dict['dataset_config'].class2size( \
                    int(pred_size_class_[i, j].detach().cpu().numpy()),
                    pred_size_residual_[i, j].detach().cpu().numpy())
                pred_box_parameters[i, j, 3:6] = box_size
                pred_box_parameters[i, j, 6] = heading_angle
                corners_3d_upright_camera = get_3d_box(
                    box_size, heading_angle, pred_center_upright_camera[i,
                                                                        j, :])
                pred_corners_3d_upright_camera[i,
                                               j] = corners_3d_upright_camera

        # pred_corners_3d_upright_camera, _ = predictions2corners3d(end_points, config_dict)
        pred_mask = np.ones((batch_size, MAX_NUM_OBJ))
        nonempty_box_mask = np.ones((batch_size, MAX_NUM_OBJ))
        pos_obj_numpy = torch.gather(pos_obj, dim=1,
                                     index=inds).detach().cpu().numpy()
        pred_sem_cls_numpy = torch.gather(argmax_cls, dim=1,
                                          index=inds).detach().cpu().numpy()
        iou_numpy = torch.gather(iou_pred, dim=1,
                                 index=inds).detach().cpu().numpy()
        for i in range(batch_size):
            boxes_3d_with_prob = np.zeros((MAX_NUM_OBJ, 8))
            for j in range(MAX_NUM_OBJ):
                boxes_3d_with_prob[j, 0] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 1] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 2] = np.min(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j, 3] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 0])
                boxes_3d_with_prob[j, 4] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 1])
                boxes_3d_with_prob[j, 5] = np.max(
                    pred_corners_3d_upright_camera[i, j, :, 2])
                boxes_3d_with_prob[j,
                                   6] = pos_obj_numpy[i, j] * iou_numpy[i, j]
                boxes_3d_with_prob[j, 7] = pred_sem_cls_numpy[
                    i,
                    j]  # only suppress if the two boxes are of the same class!!
            nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0]

            # here we do not consider orientation, in accordance to test time nms
            pick = lhs_3d_faster_samecls(
                boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :],
                config_dict['nms_iou'], config_dict['use_old_type_nms'])
            assert (len(pick) > 0)
            pred_mask[i, nonempty_box_inds[pick]] = 0
        # end_points['pred_mask'] = pred_mask
        final_mask_sorted[torch.from_numpy(pred_mask).bool().cuda()] = 0

    if config_dict['view_stats']:
        # ground truth coverage calculation
        selected_objectness_label = torch.gather(objectness_label,
                                                 dim=1,
                                                 index=inds)
        selected_object_assignment = torch.gather(object_assignment,
                                                  dim=1,
                                                  index=inds)
        gt_count = end_points['box_label_mask'].sum()

        picked_iou_labels = torch.gather(iou_labels, dim=1, index=inds)
        end_points['final_iou_avg_value'] = torch.sum(
            picked_iou_labels *
            final_mask_sorted).float() / (torch.sum(final_mask_sorted) + 1e-6)
        end_points['final_iou_avg_obj_value'] = torch.sum(
            picked_iou_labels * final_mask_sorted *
            selected_objectness_label).float() / (torch.sum(
                final_mask_sorted * selected_objectness_label) + 1e-6)
        selected_cls_pred = torch.gather(argmax_cls, dim=1, index=inds)
        selected_cls_gt = torch.gather(
            end_points['sem_cls_label'][unsupervised_inds, ...],
            dim=1,
            index=selected_object_assignment)
        correct_cls = selected_cls_pred == selected_cls_gt
        end_points['final_cls_value'] = torch.sum(
            correct_cls *
            final_mask_sorted).float() / (torch.sum(final_mask_sorted) + 1e-6)
        end_points['final_cls_obj_value'] = torch.sum(
            correct_cls * final_mask_sorted *
            selected_objectness_label).float() / (torch.sum(
                final_mask_sorted * selected_objectness_label) + 1e-6)

        gt_to_pseudo_iou = torch.gather(gt_to_pseudo_iou,
                                        dim=2,
                                        index=inds.unsqueeze(1).expand(
                                            -1, 64, -1))
        gt_to_pseudo_iou = gt_to_pseudo_iou * final_mask_sorted.unsqueeze(1)
        gt_to_pseudo_iou = gt_to_pseudo_iou.max(dim=2)[0]
        range_25 = (gt_to_pseudo_iou > 0.25).float()
        range_5 = (gt_to_pseudo_iou > 0.5).float()
        end_points['final_coverage_0.25_value'] = torch.sum(
            range_25) / gt_count
        end_points['final_coverage_0.5_value'] = torch.sum(range_5) / gt_count

    label_mask[final_mask_sorted] = 1
    heading_label = torch.gather(argmax_heading, dim=1, index=inds)
    heading_residual_label = torch.gather(pred_heading_residuals.squeeze(-1),
                                          dim=1,
                                          index=inds)
    size_label = torch.gather(argmax_size, dim=1, index=inds)
    size_residual_label = torch.gather(pred_size_residuals,
                                       dim=1,
                                       index=inds.unsqueeze(-1).expand(
                                           -1, -1, 3))
    sem_cls_label = torch.gather(argmax_cls, dim=1, index=inds)
    center_label = torch.gather(pred_center,
                                dim=1,
                                index=inds.unsqueeze(-1).expand(-1, -1, 3))
    center_label[(1 - label_mask).unsqueeze(-1).expand(-1, -1,
                                                       3).bool()] = -1000
    false_center_label = torch.gather(pred_vote_xyz,
                                      dim=1,
                                      index=inds.unsqueeze(-1).expand(
                                          -1, -1, 3))
    false_center_label[torch.logical_not(neg_objectness_mask).unsqueeze(
        -1).expand(-1, -1, 3).bool()] = -1000

    iou_label = torch.gather(iou_pred, dim=1, index=inds)

    return label_mask, center_label, sem_cls_label, heading_label, heading_residual_label, size_label, size_residual_label, false_center_label, iou_label