Ejemplo n.º 1
0
def compute_reference_loss(data_dict, config):
    """ Compute cluster reference loss

    Args:
        data_dict: dict (read-only)

    Returns:
        ref_loss, lang_loss, cluster_preds, cluster_labels
    """

    # unpack
    cluster_preds = data_dict["cluster_ref"] # (B, num_proposal)

    # predicted bbox
    pred_ref = data_dict['cluster_ref'].detach().cpu().numpy() # (B,)
    pred_center = data_dict['center'].detach().cpu().numpy() # (B,K,3)
    pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal
    pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
    pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal
    pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal
    pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal
    pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
    pred_size_class = pred_size_class.detach().cpu().numpy()
    pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3

    # ground truth bbox
    gt_center = data_dict['ref_center_label'].cpu().numpy() # (B,3)
    gt_heading_class = data_dict['ref_heading_class_label'].cpu().numpy() # B
    gt_heading_residual = data_dict['ref_heading_residual_label'].cpu().numpy() # B
    gt_size_class = data_dict['ref_size_class_label'].cpu().numpy() # B
    gt_size_residual = data_dict['ref_size_residual_label'].cpu().numpy() # B,3
    # convert gt bbox parameters to bbox corners
    gt_obb_batch = config.param2obb_batch(gt_center[:, 0:3], gt_heading_class, gt_heading_residual,
                    gt_size_class, gt_size_residual)
    gt_bbox_batch = get_3d_box_batch(gt_obb_batch[:, 3:6], gt_obb_batch[:, 6], gt_obb_batch[:, 0:3])

    # compute the iou score for all predictd positive ref
    batch_size, num_proposals = cluster_preds.shape
    labels = np.zeros((batch_size, num_proposals))
    for i in range(pred_ref.shape[0]):
        # convert the bbox parameters to bbox corners
        pred_obb_batch = config.param2obb_batch(pred_center[i, :, 0:3], pred_heading_class[i], pred_heading_residual[i],
                    pred_size_class[i], pred_size_residual[i])
        pred_bbox_batch = get_3d_box_batch(pred_obb_batch[:, 3:6], pred_obb_batch[:, 6], pred_obb_batch[:, 0:3])
        ious = box3d_iou_batch(pred_bbox_batch, np.tile(gt_bbox_batch[i], (num_proposals, 1, 1)))
        labels[i, ious.argmax()] = 1 # treat the bbox with highest iou score as the gt

    cluster_labels = torch.FloatTensor(labels).cuda()

    # reference loss
    criterion = SoftmaxRankingLoss()
    loss = criterion(cluster_preds, cluster_labels.float().clone())

    return loss, cluster_preds, cluster_labels
Ejemplo n.º 2
0
    def decode_pred_box(self, data_dict):
        # predicted bbox
        pred_center = data_dict["center"].detach().cpu().numpy() # (B,K,3)
        pred_heading_class = torch.argmax(data_dict["heading_scores"], -1) # B,num_proposal
        pred_heading_residual = torch.gather(data_dict["heading_residuals"], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
        pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal
        pred_size_class = torch.argmax(data_dict["size_scores"], -1) # B,num_proposal
        pred_size_residual = torch.gather(data_dict["size_residuals"], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
        pred_size_class = pred_size_class.detach().cpu().numpy()
        pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3

        batch_size, num_proposals, _ = pred_center.shape
        pred_bboxes = []
        for i in range(batch_size):
            # convert the bbox parameters to bbox corners
            pred_obb_batch = DC.param2obb_batch(pred_center[i, :, 0:3], pred_heading_class[i], pred_heading_residual[i],
                        pred_size_class[i], pred_size_residual[i])
            pred_bbox_batch = get_3d_box_batch(pred_obb_batch[:, 3:6], pred_obb_batch[:, 6], pred_obb_batch[:, 0:3])
            pred_bboxes.append(torch.from_numpy(pred_bbox_batch).cuda().unsqueeze(0))

        pred_bboxes = torch.cat(pred_bboxes, dim=0) # batch_size, num_proposals, 8, 3

        return pred_bboxes
Ejemplo n.º 3
0
    def __getitem__(self, idx):
        start = time.time()
        scene_id = self.scanrefer[idx]["scene_id"]
        object_id = int(self.scanrefer[idx]["object_id"])
        object_name = " ".join(self.scanrefer[idx]["object_name"].split("_"))
        ann_id = self.scanrefer[idx]["ann_id"]

        # get language features
        lang_feat = self.lang[scene_id][str(object_id)][ann_id]
        lang_len = len(self.scanrefer[idx]["token"]) + 2
        lang_len = lang_len if lang_len <= CONF.TRAIN.MAX_DES_LEN + 2 else CONF.TRAIN.MAX_DES_LEN + 2

        # get pc
        mesh_vertices = self.scene_data[scene_id]["mesh_vertices"]
        instance_labels = self.scene_data[scene_id]["instance_labels"]
        semantic_labels = self.scene_data[scene_id]["semantic_labels"]
        instance_bboxes = self.scene_data[scene_id]["instance_bboxes"]

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:,
                        3:6] = (point_cloud[:, 3:6] - MEAN_COLOR_RGB) / 256.0
            pcl_color = point_cloud[:, 3:6]

        if self.use_normal:
            normals = mesh_vertices[:, 6:9]
            point_cloud = np.concatenate([point_cloud, normals], 1)

        if self.use_multiview:
            # load multiview database
            if self.multiview_data == {}:
                self.multiview_data = h5py.File(MULTIVIEW_DATA,
                                                "r",
                                                libver="latest")

            multiview = self.multiview_data[scene_id]
            point_cloud = np.concatenate([point_cloud, multiview], 1)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

        point_cloud, choices = random_sampling(point_cloud,
                                               self.num_points,
                                               return_choices=True)
        instance_labels = instance_labels[choices]
        semantic_labels = semantic_labels[choices]
        pcl_color = pcl_color[choices]

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        ref_box_label = np.zeros(
            MAX_NUM_OBJ)  # bbox label for reference target
        ref_center_label = np.zeros(3)  # bbox center for reference target
        ref_heading_class_label = 0
        ref_heading_residual_label = 0
        ref_size_class_label = 0
        ref_size_residual_label = np.zeros(
            3)  # bbox size residual for reference target
        ref_box_corner_label = np.zeros((8, 3))

        if self.split != "test":
            num_bbox = instance_bboxes.shape[
                0] if instance_bboxes.shape[0] < MAX_NUM_OBJ else MAX_NUM_OBJ
            target_bboxes_mask[0:num_bbox] = 1
            target_bboxes[0:num_bbox, :] = instance_bboxes[:MAX_NUM_OBJ, 0:6]

            point_votes = np.zeros([self.num_points, 3])
            point_votes_mask = np.zeros(self.num_points)

            # ------------------------------- DATA AUGMENTATION ------------------------------
            if self.augment:
                if np.random.random() > 0.5:
                    # Flipping along the YZ plane
                    point_cloud[:, 0] = -1 * point_cloud[:, 0]
                    target_bboxes[:, 0] = -1 * target_bboxes[:, 0]

                if np.random.random() > 0.5:
                    # Flipping along the XZ plane
                    point_cloud[:, 1] = -1 * point_cloud[:, 1]
                    target_bboxes[:, 1] = -1 * target_bboxes[:, 1]

                # Rotation along X-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotx(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "x")

                # Rotation along Y-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = roty(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "y")

                # Rotation along up-axis/Z-axis
                rot_angle = (np.random.random() * np.pi /
                             18) - np.pi / 36  # -5 ~ +5 degree
                rot_mat = rotz(rot_angle)
                point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                             np.transpose(rot_mat))
                target_bboxes = rotate_aligned_boxes_along_axis(
                    target_bboxes, rot_mat, "z")

                # Translation
                point_cloud, target_bboxes = self._translate(
                    point_cloud, target_bboxes)

            # compute votes *AFTER* augmentation
            # generate votes
            # Note: since there's no map between bbox instance labels and
            # pc instance_labels (it had been filtered
            # in the data preparation step) we'll compute the instance bbox
            # from the points sharing the same instance label.
            for i_instance in np.unique(instance_labels):
                # find all points belong to that instance
                ind = np.where(instance_labels == i_instance)[0]
                # find the semantic label
                if semantic_labels[ind[0]] in DC.nyu40ids:
                    x = point_cloud[ind, :3]
                    center = 0.5 * (x.min(0) + x.max(0))
                    point_votes[ind, :] = center - x
                    point_votes_mask[ind] = 1.0
            point_votes = np.tile(point_votes,
                                  (1, 3))  # make 3 votes identical

            class_ind = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:num_bbox, -2]
            ]
            # NOTE: set size class as semantic class. Consider use size2class.
            size_classes[0:num_bbox] = class_ind
            size_residuals[0:num_bbox, :] = target_bboxes[
                0:num_bbox, 3:6] - DC.mean_size_arr[class_ind, :]

            # construct the reference target label for each bbox
            ref_box_label = np.zeros(MAX_NUM_OBJ)
            for i, gt_id in enumerate(instance_bboxes[:num_bbox, -1]):
                if gt_id == object_id:
                    ref_box_label[i] = 1
                    ref_center_label = target_bboxes[i, 0:3]
                    ref_heading_class_label = angle_classes[i]
                    ref_heading_residual_label = angle_residuals[i]
                    ref_size_class_label = size_classes[i]
                    ref_size_residual_label = size_residuals[i]

                    # construct ground truth box corner coordinates
                    ref_obb = DC.param2obb(ref_center_label,
                                           ref_heading_class_label,
                                           ref_heading_residual_label,
                                           ref_size_class_label,
                                           ref_size_residual_label)
                    ref_box_corner_label = get_3d_box(ref_obb[3:6], ref_obb[6],
                                                      ref_obb[0:3])

            # construct all GT bbox corners
            all_obb = DC.param2obb_batch(
                target_bboxes[:num_bbox,
                              0:3], angle_classes[:num_bbox].astype(np.int64),
                angle_residuals[:num_bbox],
                size_classes[:num_bbox].astype(np.int64),
                size_residuals[:num_bbox])
            all_box_corner_label = get_3d_box_batch(all_obb[:, 3:6],
                                                    all_obb[:, 6],
                                                    all_obb[:, 0:3])

            # store
            gt_box_corner_label = np.zeros((MAX_NUM_OBJ, 8, 3))
            gt_box_masks = np.zeros((MAX_NUM_OBJ, ))
            gt_box_object_ids = np.zeros((MAX_NUM_OBJ, ))

            gt_box_corner_label[:num_bbox] = all_box_corner_label
            gt_box_masks[:num_bbox] = 1
            gt_box_object_ids[:num_bbox] = instance_bboxes[:, -1]
        else:
            num_bbox = 1
            point_votes = np.zeros([self.num_points,
                                    9])  # make 3 votes identical
            point_votes_mask = np.zeros(self.num_points)

        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_object_ids = np.zeros(
            (MAX_NUM_OBJ, ))  # object ids of all objects
        try:
            target_bboxes_semcls[0:num_bbox] = [
                DC.nyu40id2class[int(x)]
                for x in instance_bboxes[:, -2][0:num_bbox]
            ]
            target_object_ids[0:num_bbox] = instance_bboxes[:, -1][0:num_bbox]
        except KeyError:
            pass

        object_cat = self.raw2label[
            object_name] if object_name in self.raw2label else 17

        data_dict = {}
        data_dict["point_clouds"] = point_cloud.astype(
            np.float32
        )  # point cloud data including features    [B,max_num_points,3]
        data_dict["lang_feat"] = lang_feat.astype(
            np.float32)  # language feature vectors     [B,32,300]
        data_dict["lang_len"] = np.array(lang_len).astype(
            np.int64)  # length of each description    [B]
        data_dict["lang_ids"] = np.array(
            self.lang_ids[scene_id][str(object_id)][ann_id]).astype(
                np.int64)  #     [B,32,300]
        #all data with MAX_NUM_OBJ are mostly filled with zeros
        data_dict["center_label"] = target_bboxes.astype(
            np.float32
        )[:, 0:3]  # (MAX_NUM_OBJ, 3) for GT box center XYZ  # [B,128,3]
        data_dict["heading_class_label"] = angle_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1  [B,128]
        data_dict["heading_residual_label"] = angle_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ,) [B,128]
        data_dict["size_class_label"] = size_classes.astype(
            np.int64
        )  # (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER  [B,128]
        data_dict["size_residual_label"] = size_residuals.astype(
            np.float32)  # (MAX_NUM_OBJ, 3) [B,128,3]
        data_dict["num_bbox"] = np.array(num_bbox).astype(np.int64)  # [B]
        data_dict["sem_cls_label"] = target_bboxes_semcls.astype(
            np.int64)  # (MAX_NUM_OBJ,) semantic class index
        data_dict["scene_object_ids"] = target_object_ids.astype(
            np.int64)  # (MAX_NUM_OBJ,) object ids of all objects
        data_dict["box_label_mask"] = target_bboxes_mask.astype(
            np.float32)  # (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
        data_dict["vote_label"] = point_votes.astype(np.float32)  # [B,40000,9]
        data_dict["vote_label_mask"] = point_votes_mask.astype(
            np.int64)  # [B,40000]
        data_dict["dataset_idx"] = np.array(idx).astype(
            np.int64)  # [B] object indices from self.scanrefer
        data_dict["pcl_color"] = pcl_color
        data_dict["ref_box_label"] = ref_box_label.astype(
            np.int64)  # 0/1 reference labels for each object bbox
        data_dict["ref_center_label"] = ref_center_label.astype(np.float32)
        data_dict["ref_heading_class_label"] = np.array(
            int(ref_heading_class_label)).astype(np.int64)
        data_dict["ref_heading_residual_label"] = np.array(
            int(ref_heading_residual_label)).astype(np.int64)
        data_dict["ref_size_class_label"] = np.array(
            int(ref_size_class_label)).astype(np.int64)
        data_dict["ref_size_residual_label"] = ref_size_residual_label.astype(
            np.float32)
        data_dict["ref_box_corner_label"] = ref_box_corner_label.astype(
            np.float64)  # target box corners NOTE type must be
        data_dict["gt_box_corner_label"] = gt_box_corner_label.astype(
            np.float64)  # all GT box corners NOTE type must be double
        data_dict["gt_box_masks"] = gt_box_masks.astype(
            np.int64)  # valid bbox masks
        data_dict["gt_box_object_ids"] = gt_box_object_ids.astype(
            np.int64)  # valid bbox object ids
        data_dict["object_id"] = np.array(int(object_id)).astype(
            np.int64)  # [B] target object_ids
        data_dict["ann_id"] = np.array(int(ann_id)).astype(np.int64)  # [B]
        data_dict["object_cat"] = np.array(object_cat).astype(
            np.int64)  # [B] target object classes
        data_dict["unique_multiple"] = np.array(
            self.unique_multiple_lookup[scene_id][str(
                object_id)][ann_id]).astype(np.int64)
        data_dict["pcl_color"] = pcl_color  # [B,40000,3]
        data_dict["load_time"] = time.time() - start

        return data_dict
Ejemplo n.º 4
0
def compute_reference_loss(data_dict,
                           config,
                           use_lang_classifier=False,
                           use_max_iou=False):
    """ Compute cluster reference loss

    Args:
        data_dict: dict (read-only)
        use_lang_classifier: Boolean, whether the language classifier is applied here or not
        use_max_iou: Boolean, whether marking the bbox with highest iou score as the only positive or not

    Returns:
        ref_loss, lang_loss, cluster_preds, cluster_labels
    """

    # unpack
    cluster_preds = data_dict["cluster_ref"]  # (B, num_proposal)
    object_assignment = data_dict["object_assignment"]  # (B, num_proposal)
    objectness_labels = data_dict['objectness_label'].float()

    # select assigned reference boxes
    if use_max_iou:
        # predicted bbox
        pred_ref = data_dict['cluster_ref'].detach().cpu().numpy()  # (B,)
        pred_center = data_dict['center'].detach().cpu().numpy()  # (B,K,3)
        pred_heading_class = torch.argmax(data_dict['heading_scores'],
                                          -1)  # B,num_proposal
        pred_heading_residual = torch.gather(
            data_dict['heading_residuals'], 2,
            pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
        pred_heading_class = pred_heading_class.detach().cpu().numpy(
        )  # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal
        pred_size_class = torch.argmax(data_dict['size_scores'],
                                       -1)  # B,num_proposal
        pred_size_residual = torch.gather(
            data_dict['size_residuals'], 2,
            pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
                1, 1, 1, 3))  # B,num_proposal,1,3
        pred_size_class = pred_size_class.detach().cpu().numpy()
        pred_size_residual = pred_size_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal,3

        # ground truth bbox
        gt_center = data_dict['ref_center_label'].cpu().numpy()  # (B,3)
        gt_heading_class = data_dict['ref_heading_class_label'].cpu().numpy(
        )  # B
        gt_heading_residual = data_dict['ref_heading_residual_label'].cpu(
        ).numpy()  # B
        gt_size_class = data_dict['ref_size_class_label'].cpu().numpy()  # B
        gt_size_residual = data_dict['ref_size_residual_label'].cpu().numpy(
        )  # B,3
        # convert gt bbox parameters to bbox corners
        gt_obb_batch = config.param2obb_batch(gt_center[:,
                                                        0:3], gt_heading_class,
                                              gt_heading_residual,
                                              gt_size_class, gt_size_residual)
        gt_bbox_batch = get_3d_box_batch(gt_obb_batch[:, 3:6],
                                         gt_obb_batch[:, 6], gt_obb_batch[:,
                                                                          0:3])

        # compute the iou score for all predictd positive ref
        batch_size, num_proposals = cluster_preds.shape
        cluster_labels = np.zeros((batch_size, num_proposals))
        for i in range(pred_ref.shape[0]):
            # convert the bbox parameters to bbox corners
            pred_obb_batch = config.param2obb_batch(pred_center[i, :, 0:3],
                                                    pred_heading_class[i],
                                                    pred_heading_residual[i],
                                                    pred_size_class[i],
                                                    pred_size_residual[i])
            pred_bbox_batch = get_3d_box_batch(pred_obb_batch[:, 3:6],
                                               pred_obb_batch[:, 6],
                                               pred_obb_batch[:, 0:3])
            ious = box3d_iou_batch(
                pred_bbox_batch,
                np.tile(gt_bbox_batch[i], (num_proposals, 1, 1)))
            cluster_labels[i, ious.argmax(
            )] = 1  # treat the bbox with highest iou score as the gt

        cluster_labels = torch.FloatTensor(cluster_labels).cuda()

        # reference loss
        REFERENCE_CLS_WEIGHTS = [1 / NUM_PROPOSALS,
                                 1]  # put larger weights on positive reference
        criterion = SoftmaxRankingLoss(REFERENCE_CLS_WEIGHTS)
        ref_loss = criterion(cluster_preds, cluster_labels.float().clone())
    else:
        cluster_labels = data_dict["ref_box_label"]  # (B, num_max_obj)
        cluster_labels = torch.gather(cluster_labels, 1,
                                      object_assignment)  # (B, num_proposal)

        # reference loss
        REFERENCE_CLS_WEIGHTS = [0.01,
                                 1]  # put larger weights on positive reference
        criterion = SoftmaxRankingLoss(REFERENCE_CLS_WEIGHTS)
        ref_loss = criterion(cluster_preds, cluster_labels.float())

    # language loss
    if use_lang_classifier:
        criterion = torch.nn.CrossEntropyLoss()
        lang_loss = criterion(data_dict["lang_scores"],
                              data_dict["object_cat"])
    else:
        lang_loss = torch.zeros(1)[0].cuda()

    return ref_loss, lang_loss, cluster_preds, cluster_labels
Ejemplo n.º 5
0
def get_loss(data_dict, config):
    """ Loss functions
    Args:
        data_dict: dict
        config: dataset config instance
        reference: flag (False/True)
    Returns:
        loss: pytorch scalar tensor
        data_dict: dict
    """
    lang_loss = compute_lang_classification_loss(data_dict)
    data_dict["lang_loss"] = lang_loss
    seg_loss, seg_acc = compute_scene_mask_loss(data_dict)

    # get ref gt
    ref_center_label = data_dict["ref_center_label"].detach().cpu().numpy()
    ref_heading_class_label = data_dict["ref_heading_class_label"].detach().cpu().numpy()
    ref_heading_residual_label = data_dict["ref_heading_residual_label"].detach().cpu().numpy()
    ref_size_class_label = data_dict["ref_size_class_label"].detach().cpu().numpy()
    ref_size_residual_label = data_dict["ref_size_residual_label"].detach().cpu().numpy()

    ref_gt_obb = config.param2obb_batch(ref_center_label, ref_heading_class_label, ref_heading_residual_label,
                                        ref_size_class_label, ref_size_residual_label)
    ref_gt_bbox = get_3d_box_batch(ref_gt_obb[:, 3:6], ref_gt_obb[:, 6], ref_gt_obb[:, 0:3])

    attribute_scores = data_dict['attribute_scores']
    relation_scores = data_dict['relation_scores']
    scene_scores = data_dict['scene_scores']

    pred_obb_batch = data_dict['pred_obb_batch']
    batch_size = len(pred_obb_batch)
    cluster_label = []
    box_mask = torch.zeros(batch_size).cuda()

    criterion = ContrastiveLoss(margin=0.2, gamma=5)
    ref_loss = torch.zeros(1).cuda().requires_grad_(True)
    start_idx = 0
    for i in range(batch_size):
        pred_obb = pred_obb_batch[i]  # (num, 7)
        num_filtered_obj = pred_obb.shape[0]
        if num_filtered_obj == 0:
            cluster_label.append([])
            box_mask[i] = 1
            continue

        label = np.zeros(num_filtered_obj)
        pred_bbox = get_3d_box_batch(pred_obb[:, 3:6], pred_obb[:, 6], pred_obb[:, 0:3])
        ious = box3d_iou_batch(pred_bbox, np.tile(ref_gt_bbox[i], (num_filtered_obj, 1, 1)))
        label[ious.argmax()] = 1  # treat the bbox with highest iou score as the gt

        label = torch.FloatTensor(label).cuda()
        cluster_label.append(label)
        if num_filtered_obj == 1: continue

        attribute_score = attribute_scores[start_idx:start_idx + num_filtered_obj]
        relation_score = relation_scores[start_idx:start_idx + num_filtered_obj]
        scene_score = scene_scores[start_idx:start_idx + num_filtered_obj]
        score = attribute_score + relation_score + scene_score

        start_idx += num_filtered_obj
        if ious.max() < 0.2: continue

        ref_loss = ref_loss + criterion(score, label)

    ref_loss = ref_loss / batch_size
    data_dict['ref_loss'] = ref_loss

    data_dict['loss'] = 10 * ref_loss + lang_loss + seg_loss
    data_dict["seg_loss"] = seg_loss
    data_dict['seg_acc'] = seg_acc
    data_dict['seg_loss'] = seg_loss
    data_dict['cluster_label'] = cluster_label

    return data_dict
Ejemplo n.º 6
0
def compute_reference_loss(data_dict, config):
    """ Compute cluster reference loss

    Args:
        data_dict: dict (read-only)

    Returns:
        ref_loss, lang_loss, cluster_preds, cluster_labels
    """

    # unpack
    cluster_preds = data_dict["cluster_ref"]  # (B, num_proposal)

    # predicted bbox
    pred_ref = data_dict['cluster_ref'].detach().cpu().numpy()  # (B,)
    pred_center = data_dict['center'].detach().cpu().numpy()  # (B,K,3)
    pred_heading_class = torch.argmax(data_dict['heading_scores'],
                                      -1)  # B,num_proposal
    pred_heading_residual = torch.gather(
        data_dict['heading_residuals'], 2,
        pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
    pred_heading_class = pred_heading_class.detach().cpu().numpy(
    )  # B,num_proposal
    pred_heading_residual = pred_heading_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal
    pred_size_class = torch.argmax(data_dict['size_scores'],
                                   -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        data_dict['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_class = pred_size_class.detach().cpu().numpy()
    pred_size_residual = pred_size_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal,3

    # ground truth bbox
    gt_center = data_dict['ref_center_label'].cpu().numpy()  # (B,3)
    gt_heading_class = data_dict['ref_heading_class_label'].cpu().numpy()  # B
    gt_heading_residual = data_dict['ref_heading_residual_label'].cpu().numpy(
    )  # B
    gt_size_class = data_dict['ref_size_class_label'].cpu().numpy()  # B
    gt_size_residual = data_dict['ref_size_residual_label'].cpu().numpy(
    )  # B,3
    # convert gt bbox parameters to bbox corners
    gt_obb_batch = config.param2obb_batch(gt_center[:, 0:3], gt_heading_class,
                                          gt_heading_residual, gt_size_class,
                                          gt_size_residual)
    gt_bbox_batch = get_3d_box_batch(gt_obb_batch[:, 3:6], gt_obb_batch[:, 6],
                                     gt_obb_batch[:, 0:3])

    # compute the iou score for all predictd positive ref
    batch_size, num_proposals = cluster_preds.shape
    labels = np.zeros((batch_size, num_proposals))
    ref_iou_idx = []
    for i in range(pred_ref.shape[0]):
        # convert the bbox parameters to bbox corners
        pred_obb_batch = config.param2obb_batch(pred_center[i, :, 0:3],
                                                pred_heading_class[i],
                                                pred_heading_residual[i],
                                                pred_size_class[i],
                                                pred_size_residual[i])
        pred_bbox_batch = get_3d_box_batch(pred_obb_batch[:, 3:6],
                                           pred_obb_batch[:, 6],
                                           pred_obb_batch[:, 0:3])
        ious = box3d_iou_batch(
            pred_bbox_batch, np.tile(gt_bbox_batch[i], (num_proposals, 1, 1)))
        labels[i, ious.argmax(
        )] = 1  # treat the bbox with highest iou score as the gt
        ref_iou_idx.append(ious.argmax())

    cluster_labels = torch.FloatTensor(labels).cuda()

    # reference loss
    criterion = SoftmaxRankingLoss()
    loss = criterion(cluster_preds, cluster_labels.float().clone())

    # # 1 vs all other objects with same label, across the entire batch
    # all_gt_center_label             = data_dict['center_label'].cpu().numpy()           # (B,128,3)
    # all_gt_heading_class_label      = data_dict["heading_class_label"].cpu().numpy()    # (B,128,3)
    # all_gt_heading_residual_label   = data_dict["heading_residual_label"].cpu().numpy() # (B,128)
    # all_gt_size_class_label         = data_dict["size_class_label"].cpu().numpy()       # (B,128)
    # all_gt_size_residual_label      = data_dict["size_residual_label"].cpu().numpy()    # (B,128,3)

    # all_gt_sem_labels               = data_dict['size_class_label'].cpu().numpy()
    # all_gt_num_bbox                 = data_dict['num_bbox'].cpu().numpy()               #(B)
    # all_gt_box_label                = data_dict['ref_box_label'].cpu().numpy()          #(B)
    # gt_obj_id                       = data_dict['object_id'].cpu().numpy()              #(B,128)
    # loss_contrastive = []
    # label_done = []

    # flag_add_threshold = num_sample_contra if num_sample_contra!=None else 1
    # # Loop1: Each batch. (B)
    # for indx in range(len(all_gt_sem_labels)):
    #     current_label = gt_size_class[indx]
    #     label_done.append(current_label)

    #     # Initial output, the first item is the postive sample
    #     out       = np.array(pred_ref[indx][ref_iou_idx[indx]])
    #     out_label = np.array(1.0)
    #     flag_add  = 0

    #     # Loop2: Compare current batch to all batches (B)
    #     for indx2 in range(len(all_gt_sem_labels)):
    #         if flag_add >= flag_add_threshold: break
    #         item_list = all_gt_sem_labels[indx2]  # Items in the compare batch

    #         # Loop3: Each object in the same scene (128)
    #         for indx_test in range(len(item_list)):
    #             if flag_add >= flag_add_threshold: break
    #             #Stop if exceeding number of obj
    #             if indx_test >= all_gt_num_bbox[indx2]-1 :
    #                 break

    #             match_label_same = all_gt_box_label[indx][indx_test] if indx == indx2 else 0  # Don't add the it self twice if in the same batch
    #             # Find the object in same scene with same label
    #             if current_label == item_list[indx_test] and match_label_same != 1:
    #                 # print('Batch indx1 :', indx, 'Batch indx2 :', indx2, " item index : ", indx_test, "out :", out.shape)
    #                 # Comparing
    #                 # get required data
    #                 match_center_label              = all_gt_center_label[indx2][indx_test]
    #                 match_heading_class_label       = all_gt_heading_class_label[indx2][indx_test]
    #                 match_heading_residual_label    = all_gt_heading_residual_label[indx2][indx_test]
    #                 match_size_class_label          = all_gt_size_class_label[indx2][indx_test]
    #                 match_size_residual_label       = all_gt_size_residual_label[indx2][indx_test]

    #                 # convert gt bbox parameters to bbox corners
    #                 gt_obb        = config.param2obb(match_center_label[0:3], match_heading_class_label, match_heading_residual_label,
    #                                 match_size_class_label, match_size_residual_label)
    #                 gt_bbox_batch = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])  # rename

    #                 # convert the bbox parameters to bbox corners
    #                 pred_obb_batch = config.param2obb_batch(pred_center[indx2, :, 0:3], pred_heading_class[indx2], pred_heading_residual[indx2],
    #                                     pred_size_class[indx2], pred_size_residual[indx2])
    #                 pred_bbox_batch = get_3d_box_batch(pred_obb_batch[:, 3:6], pred_obb_batch[:, 6], pred_obb_batch[:, 0:3])
    #                 # Find highest IOU to find box
    #                 ious                     = box3d_iou_batch(pred_bbox_batch, np.tile(gt_bbox_batch, (num_proposals, 1, 1)))
    #                 labels[i, ious.argmax()] = 1        # treat the bbox with highest iou score as the gt
    #                 out                      = np.append(out,pred_ref[indx2][ious.argmax()])
    #                 out_label                = np.append(out_label,0)
    #                 flag_add                 += 1

    #     # out in a batch is a 'list' of boxex where the first one is positive sample
    #     if flag_add != 0:
    #         out       = torch.FloatTensor(out)
    #         out_label = torch.FloatTensor(out_label)
    #         test1     = out.float().clone()
    #         test2     = out_label.float().clone()
    #         # assert test1.shape == test2.shape
    #         # print("test1",test1.shape)
    #         # print("test2",test2.shape)
    #         loss_temp = criterion(test1, test2,dim_in=0)
    #         loss_contrastive.append(loss_temp)

    #     # Calculate the loss in each batch and append to result

    # # Mean result
    # loss_contrastive = torch.stack(loss_contrastive)
    # loss_contrastive = torch.mean(loss_contrastive)
    # loss_contrastive = loss_contrastive.cuda()
    # loss_contrastive.requires_grad=True

    return loss, cluster_preds, cluster_labels