Example #1
0
            for j in range(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][i][:, -1] >= image_thresh)[0]
Example #2
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            keep_idx_i = nms(proposals_single, scores_single.squeeze(1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        # do not backward
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]

        coders = bbox_coders.build(
            self.target_generators.target_generator_config['coder_config'])
        proposals = coders.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, :, 1]

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(proposals_single, fg_probs_single,
                             self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order
Example #4
0
def select_boxes(rois,
                 cls_prob,
                 bbox_pred,
                 im_infos,
                 thresh=0.05,
                 max_per_image=5):
    """ Select bounding boxes of objects from the predicted results of faster rcnn """
    n_classes = cls_prob.shape[2]
    all_boxes = []
    for i in range(rois.shape[0]):
        boxes = rois[i, :, 1:5].view(1, -1, 4)
        scores = cls_prob[i, :, :].view(1, -1, n_classes)
        box_deltas = bbox_pred[i, :, :].view(1, -1, 4 * n_classes)
        im_info = im_infos[i, :].view(1, 3)
        # Normalize boxes deltas by a mean and std
        bbox_normalize_means = (0.0, 0.0, 0.0, 0.0)
        bbox_normalize_stds = (0.1, 0.1, 0.2, 0.2)
        box_deltas = box_deltas.view(-1, 4) * torch.Tensor(bbox_normalize_stds).cuda() \
                     + torch.Tensor(bbox_normalize_means).cuda()
        # box_deltas = box_deltas.view(-1, 4) * torch.Tensor(bbox_normalize_stds) \
        #              + torch.Tensor(bbox_normalize_means)
        # 21 is the number of classed in pascal voc datasets
        box_deltas = box_deltas.view(1, -1, 4 * n_classes)

        # Compute predicted boxes by predicted rois and the corresponding box deltas
        # Clip borders of predicted boxes if they cross the border of the resized image
        pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size=1)
        # padding = 5
        # pred_boxes[:, :, 0::4] -= padding
        # pred_boxes[:, :, 1::4] -= padding
        # pred_boxes[:, :, 2::4] += padding
        # pred_boxes[:, :, 3::4] += padding
        pred_boxes = clip_boxes(pred_boxes, im_info, batch_size=1)

        # pred_boxes.shape: (300, 4 * n_classes)
        # scores.shape: (300, n_classes)
        pred_boxes = pred_boxes.squeeze()
        scores = scores.squeeze()

        all_box = []

        for j in range(1, 21):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, dim=0, descending=True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                # Concatenate boxes coordinates and class scores
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]

                # Non-maximum suppression (suppress boxes with IoU >= 0.3) for selecting predicted boxes for each object class in the image
                keep = nms(cls_boxes[order, :], cls_scores[order], 0.3)
                cls_dets = cls_dets[keep.view(-1).long()]

                # Concatenate the class ids of boxes
                class_id = torch.ones(cls_dets.shape[0], 1,
                                      dtype=torch.float).cuda() * j
                # class_id = torch.ones(cls_dets.shape[0], 1, dtype=torch.float) * j
                cls_dets = torch.cat((cls_dets, class_id), dim=1)
                # Add each box
                for i in range(cls_dets.shape[0]):
                    all_box.append(cls_dets[i, :].cpu().detach().numpy())
                    # all_box.append(cls_dets[i, :].detach().numpy())

        all_box = np.array(all_box)
        if all_box.shape[0] != 0:
            # all_box = np.concatenate((all_box, all_box), axis=0)
            # Rank all boxes based on scores in the descending order
            index = all_box[:, 4].argsort()[::-1]
            all_box = all_box[index, :]
            # Limit to max_per_image detections *over all classes*
            if all_box.shape[0] > max_per_image:
                all_box = all_box[:max_per_image, :]

            # all_box.shape: (num_boxes, 6), 6: (x_min, y_min, x_max, y_max, score, class_id)
            # print(all_box)
            # print(all_box.shape[0])
            # print('\n')
        all_boxes.append(all_box)

    return all_boxes
Example #5
0
    def test_2d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]
            dets = [[]]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]

            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat([
                            threshed_boxes_2d_per_img,
                            threshed_scores_per_img.unsqueeze(-1),
                        ],
                                                          dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()

                        dets.append(nms_dets_per_img)
                    else:
                        dets.append([])

                duration_time = time.time() - end_time
                label_path = self._generate_label_path(image_path[batch_ind])
                self.save_dets(dets, label_path, image_path[batch_ind])
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Example #6
0
    def test_corners_3d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]
            #  dims = prediction[constants.KEY_DIMS]
            corners_2d = prediction[constants.KEY_CORNERS_2D]
            #  import ipdb
            #  ipdb.set_trace()
            p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

            # rcnn_3d = prediction['rcnn_3d']
            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                #  dims_per_img = dims[batch_ind]
                corners_2d_per_img = corners_2d[batch_ind]
                p2_per_img = p2[batch_ind]

                num_cols = corners_2d.shape[-1]
                dets = [np.zeros((0, 8, num_cols), dtype=np.float32)]
                dets_2d = [np.zeros((0, 4), dtype=np.float32)]

                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        #  threshed_dims_per_img = dims_per_img[inds]
                        threshed_corners_2d_per_img = corners_2d_per_img[inds]
                        # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat(
                            [
                                threshed_boxes_2d_per_img,
                                threshed_scores_per_img.unsqueeze(-1),
                                #  threshed_dims_per_img,
                            ],
                            dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]
                        threshed_corners_2d_per_img = threshed_corners_2d_per_img[
                            order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()
                        nms_corners_2d_per_img = threshed_corners_2d_per_img[
                            keep].detach().cpu().numpy()

                        dets.append(nms_corners_2d_per_img)
                        dets_2d.append(nms_dets_per_img[:, :4])
                    else:
                        dets.append(
                            np.zeros((0, 8, num_cols), dtype=np.float32))
                        dets_2d.append(np.zeros((0, 4)))

                # import ipdb
                # ipdb.set_trace()
                corners = np.concatenate(dets, axis=0)
                dets_2d = np.concatenate(dets_2d, axis=0)
                corners_2d = None
                corners_3d = None
                if num_cols == 3:
                    corners_3d = corners
                else:
                    corners_2d = corners

                self.visualizer.render_image_corners_2d(
                    image_path[0],
                    boxes_2d=dets_2d,
                    corners_2d=corners_2d,
                    corners_3d=corners_3d,
                    p2=p2_per_img.cpu().numpy())

                duration_time = time.time() - end_time
                #  label_path = self._generate_label_path(image_path[batch_ind])
                #  self.save_mono_3d_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Example #7
0
    def inference(self, im, p2):
        """
        Args:
            im: shape(N, 3, H, W)

        Returns:
            dets: shape(N, M, 8)
        """
        config = self.config
        args = self.args
        eval_config = config['eval_config']
        model_config = config['model_config']
        data_config = config['eval_data_config']

        np.random.seed(eval_config['rng_seed'])

        self.logger.info('Using config:')
        pprint.pprint({
            'model_config': model_config,
            'data_config': data_config,
            'eval_config': eval_config
        })

        eval_out = eval_config['eval_out']
        if not os.path.exists(eval_out):
            self.logger.info('creat eval out directory {}'.format(eval_out))
            os.makedirs(eval_out)
        else:
            self.logger.warning('dir {} exist already!'.format(eval_out))

        # restore from random or checkpoint
        restore = True
        # two methods to load model
        # 1. load from any other dirs,it just needs config and model path
        # 2. load from training dir
        if args.model is not None:
            # assert args.model is not None, 'please determine model or checkpoint'
            # it should be a path to model
            checkpoint_name = os.path.basename(args.model)
            input_dir = os.path.dirname(args.model)
        elif args.checkpoint is not None:
            checkpoint_name = 'detector_{}.pth'.format(args.checkpoint)
            assert args.load_dir is not None, 'please choose a directory to load checkpoint'
            eval_config['load_dir'] = args.load_dir
            input_dir = os.path.join(eval_config['load_dir'],
                                     model_config['type'], data_config['name'])
            if not os.path.exists(input_dir):
                raise Exception(
                    'There is no input directory for loading network from {}'.
                    format(input_dir))
        else:
            restore = False

        # log for restore
        if restore:
            self.logger.info("restore from checkpoint")
        else:
            self.logger.info("use pytorch default initialization")

        # model
        model = detectors.build(model_config)
        model.eval()

        if restore:
            # saver
            saver = Saver(input_dir)
            saver.load({'model': model}, checkpoint_name)

        model = model.cuda()

        #  dataloader = dataloaders.make_data_loader(data_config, training=False)

        self.logger.info('Start testing')
        #  num_samples = len(dataloader)

        #  for step, data in enumerate(dataloader):
        data = self.preprocess(im, p2)
        data = self.to_batch(data)
        data = common.to_cuda(data)
        #  image_path = data[constants.KEY_IMAGE_PATH]

        with torch.no_grad():
            prediction = model(data)

        # initialize dets for each classes
        dets = [[]]

        scores = prediction[constants.KEY_CLASSES]
        boxes_2d = prediction[constants.KEY_BOXES_2D]
        dims = prediction[constants.KEY_DIMS]
        orients = prediction[constants.KEY_ORIENTS_V2]
        p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

        # rcnn_3d = prediction['rcnn_3d']
        batch_size = scores.shape[0]
        scores = scores.view(-1, self.n_classes)
        new_scores = torch.zeros_like(scores)
        _, scores_argmax = scores.max(dim=-1)
        row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
        new_scores[row, scores_argmax] = scores[row, scores_argmax]
        scores = new_scores.view(batch_size, -1, self.n_classes)

        boxes_2d_per_img = boxes_2d[0]
        scores_per_img = scores[0]
        dims_per_img = dims[0]
        orients_per_img = orients[0]
        p2_per_img = p2[0]
        # rcnn_3d_per_img = rcnn_3d[batch_ind]
        # import ipdb
        # ipdb.set_trace()
        for class_ind in range(1, self.n_classes):
            # cls thresh
            inds = torch.nonzero(
                scores_per_img[:, class_ind] > self.thresh).view(-1)
            threshed_scores_per_img = scores_per_img[inds, class_ind]
            if inds.numel() > 0:
                threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                threshed_dims_per_img = dims_per_img[inds]
                threshed_orients_per_img = orients_per_img[inds]
                threshed_dets_per_img = torch.cat([
                    threshed_boxes_2d_per_img,
                    threshed_scores_per_img.unsqueeze(-1),
                    threshed_dims_per_img,
                    threshed_orients_per_img.unsqueeze(-1)
                ],
                                                  dim=-1)

                # sort by scores
                _, order = torch.sort(threshed_scores_per_img, 0, True)
                threshed_dets_per_img = threshed_dets_per_img[order]

                # nms
                keep = nms(threshed_dets_per_img[:, :4],
                           threshed_dets_per_img[:, 4],
                           self.nms).view(-1).long()
                nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu(
                ).numpy()

                # calculate location
                location = geometry_utils.calc_location(
                    nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5],
                    nms_dets_per_img[:, 8], p2_per_img.cpu().numpy())

                nms_dets_per_img = np.concatenate(
                    [
                        nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8],
                        location, nms_dets_per_img[:, -1:]
                    ],
                    axis=-1)

                dets.append(nms_dets_per_img)
            else:
                dets.append([])

            #  duration_time = time.time() - end_time
            #  label_path = self._generate_label_path(image_path[batch_ind])
            #  self.save_mono_3d_dets(dets, label_path)
            #  sys.stdout.write('\r{}/{},duration: {}'.format(
            #  step + 1, num_samples, duration_time))
            #  sys.stdout.flush()

            #  end_time = time.time()

            #  xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry
        return dets
Example #8
0
def evaluation(name, net=None, vis=False, cuda=True, class_agnostic=False):
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    if not net:

        input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
        # input_dir = 'weight'
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)
        # load_name = os.path.join(input_dir,
        #                          'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

        load_name = os.path.join(
            input_dir, 'faster_rcnn_{}_best.pth'.format(cfg['POOLING_MODE']))

        # initilize the network here.
        if args.net == 'vgg16':
            fasterRCNN = vgg16(imdb.classes,
                               pretrained=False,
                               class_agnostic=args.class_agnostic)
        elif args.net == 'res101':
            fasterRCNN = resnet(imdb.classes,
                                101,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res50':
            fasterRCNN = resnet(imdb.classes,
                                50,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res152':
            fasterRCNN = resnet(imdb.classes,
                                152,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        else:
            print("network is not defined")
            pdb.set_trace()

        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')

    else:

        fasterRCNN = net

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if cuda:
        cfg.CUDA = True

    if cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    # vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                             imdb.num_classes, training=False, normalize=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                         .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map = imdb.evaluate_detections(all_boxes, output_dir)
    # print(map)
    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return map
def validate_voc(val_loader, S_RAD, epoch, num_class, num_segments, session,
                 batch_size, cfg, log, dataset, pathway, eval_metrics):
    val_iters_per_epoch = int(np.round(len(val_loader)))
    S_RAD.eval()
    all_boxes = [[[[] for _ in range(num_class)]
                  for _ in range(batch_size * num_segments)]
                 for _ in range(val_iters_per_epoch)]
    bbox = [[[[] for _ in range(num_class)]
             for _ in range(batch_size * num_segments)]
            for _ in range(val_iters_per_epoch)]
    #limit the number of proposal per image across all the class
    max_per_image = cfg.MAX_DET_IMG

    #confusion matrix
    conf_mat = ConfusionMatrix(num_classes=num_class,
                               CONF_THRESHOLD=0.8,
                               IOU_THRESHOLD=0.2,
                               dataset=dataset)

    num_gt = [0 for _ in range(num_class)]

    #data_iter = iter(val_loader)
    for step, data in enumerate(val_loader):

        #evaluate /inference code
        #start_time = time.time()
        rois, cls_prob, bbox_pred = S_RAD(data)
        #torch.cuda.synchronize()
        #end_time = time.time() - start_time

        if dataset == 'ucfsport':
            class_dict = act2id
        elif dataset == 'jhmdb':
            class_dict = jhmdbact2id
        elif dataset == 'ucf24':
            class_dict = ucf24act2id
        elif dataset == 'urfall':
            class_dict = fallactivity2id
        elif dataset == 'imfd':
            class_dict = imfallactivity2id
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class)

        #transforms the image to x1,y1,x2,y2, format and clips the coord to images
        pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0])
        if pathway == "two_pathway":
            im_info = data[0][3].view(-1, 3).to(device="cuda")
            gt_boxes = (data[0][1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                        num_class + 4)).to(device="cuda")
        else:
            im_info = data[3].view(-1, 3).to(device="cuda")
            gt_boxes = (data[1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                     num_class + 4)).to(device="cuda")
            pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0])

        #gt boxes
        gtbb = gt_boxes[:, :, 0:4]
        gtlabels = gt_boxes[:, :, 4:]

        #move the groudtruth to cpu
        gtbb = gtbb.cpu().numpy()
        gtlabels = gtlabels.cpu().numpy()
        #count = 0

        for image in range(pred_boxes.shape[0]):
            for class_id in range(1, num_class):
                inds = torch.nonzero(scores[image, :, class_id] > 0).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[image, inds, class_id]
                    #arranging in descending order
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[image, inds,
                                           class_id * 4:(class_id + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order, :]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1)]
                    all_boxes[step][image][class_id] = cls_dets.cpu().numpy()

                #collect groud truth boxes for the image
                index = np.unique(np.nonzero(gtbb[image])[0])
                gtbox = gtbb[image][index]
                label = gtlabels[image][index]

                #take groundtruth box only if the label =1 for that class
                bbox[step][image][class_id] = [
                    gtbox[i] for i in range(len(label)) if label[i, class_id]
                ]
                num_gt[class_id] += np.sum(len(bbox[step][image][class_id]))
                if eval_metrics:
                    if len(bbox[step][image][class_id]) > 0 and len(
                            all_boxes[step][image][class_id]) > 0:
                        conf_mat.process_batch(all_boxes[step][image],
                                               bbox[step][image])

    if eval_metrics:
        result = conf_mat.return_matrix()
        print(result)
        conf_mat.plot(result)

    ap = [None for _ in range(num_class)]

    #calculate fp anf tp for each detections
    for cls_id in range(1, num_class):

        tpfp = []
        class_det = []
        for video in range(len(all_boxes)):
            for batch in range(len(all_boxes[0])):
                tp_fp = (tpfp_default(all_boxes[video][batch][cls_id],\
                   bbox[video][batch][cls_id],iou_thr=0.5))
                if (len(tp_fp) > 0
                        and len(all_boxes[video][batch][cls_id]) > 0):
                    tpfp.append(tp_fp)
                    class_det.append(all_boxes[video][batch][cls_id])
        assert len(tpfp) == len(class_det)
        tp, fp = tuple(zip(*tpfp))

        # sort all det bboxes by score, also sort tp and fp
        cls_det = np.vstack(class_det)
        num_dets = cls_det.shape[0]
        sort_inds = np.argsort(-cls_det[:, -1])
        tp = np.hstack(tp)[:, sort_inds]
        fp = np.hstack(fp)[:, sort_inds]

        # calculate recall and precision with tp and fp
        tp = np.cumsum(tp, axis=1)
        fp = np.cumsum(fp, axis=1)
        eps = np.finfo(np.float32).eps
        recalls = tp / np.maximum(num_gt[cls_id], eps)
        precisions = tp / np.maximum((tp + fp), eps)

        #ROC curve visualisation
        if eval_metrics:
            import matplotlib.pyplot as plt
            colors = [
                'ac', 'navy', 'gold', 'turquoise', 'red', 'green', 'black',
                'brown', 'darkorange', 'cornflowerblue', 'teal'
            ]
            plt.plot(recalls[0, :],
                     precisions[0, :],
                     color=colors[cls_id],
                     lw=2,
                     label='class {}'.format(cls_id))

        ap[cls_id] = average_precision(recalls[0, :],
                                       precisions[0, :],
                                       mode='area')

    #Plot ROC Curve
    if eval_metrics:
        fig = plt.gcf()
        fig.subplots_adjust(bottom=0.25)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Extension of Precision-Recall curve to multi-class')
        plt.legend(loc="best")
        plt.show()

    for k, v in class_dict.items():
        #print("Average precision per class:")
        out = ("class [{0}]:{1}   |gt:{2}".format(k, ap[v], num_gt[v]))
        print(out)
        log.write(out + '\n')
    mAP = ("mAP for epoch [{0}] is : {1}".format(epoch, mean(ap[1:])))
    print(mAP)
    log.write(mAP + '\n')
    log.flush()
    print("----------------------------------------------")
def validate_virat(val_loader, S_RAD, epoch, num_class, num_segments, vis,
                   session, batch_size, input_data, cfg, log, dataset):
    val_iters_per_epoch = int(np.round(len(val_loader)))
    im_data, im_info, num_boxes, gt_boxes = input_data
    S_RAD.eval()
    all_boxes = [[[[] for _ in range(num_class)]
                  for _ in range(batch_size * num_segments)]
                 for _ in range(val_iters_per_epoch)]
    #limit the number of proposal per image across all the class
    max_per_image = cfg.MAX_DET_IMG

    #dict  with matched detections and its score @class_idx
    eval_target = {one: 1 for one in activity2id_person}
    e = {one: {} for one in eval_target}  # cat_id -> imgid -> {"dm","dscores"}

    #unique image id
    imgid = 0
    num_gt = [0 for _ in range(num_class)]
    for step, data in enumerate(val_loader):

        im_data.resize_(data[0].size()).copy_(data[0])
        gt_boxes.resize_(data[1].size()).copy_(data[1])
        num_boxes.resize_(data[2].size()).copy_(data[2])
        im_info.resize_(data[3].size()).copy_(data[3])
        im_data = im_data.view(-1, im_data.size(2), im_data.size(3),
                               im_data.size(4))
        im_info = im_info.view(-1, 3)
        gt_boxes = gt_boxes.view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)
        num_boxes = num_boxes.view(-1)

        #evaluate /inference cpde
        start = time.time()
        rois, cls_prob, bbox_pred = S_RAD(im_data, im_info, gt_boxes,
                                          num_boxes)
        torch.cuda.synchronize()
        end_time = time.time() - start
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        #batch_size = rois.shape[0]
        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class)

        #transforms the image to x1,y1,x2,y2, format and clips the coord to images
        pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0])
        pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0])

        #gt boxes
        gtbb = gt_boxes[:, :, 0:4]
        gtlabels = gt_boxes[:, :, 4:]
        #pred_boxes /= data[3][0][1][2].item()
        #gtbb /= data[3][0][1][2].item()

        #move the groudtruth to cpu
        gtbb = gtbb.cpu().numpy()
        gtlabels = gtlabels.cpu().numpy()
        #count = 0

        for image in range(pred_boxes.shape[0]):
            box = [None for _ in range(num_class)]
            imgid += 1
            for class_id in range(1, num_class):
                inds = torch.nonzero(
                    scores[image, :,
                           class_id] > cfg.VIRAT.SCORE_THRES).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[image, inds, class_id]
                    #arranging in descending order
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[image, inds,
                                           class_id * 4:(class_id + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order, :]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1)]
                    all_boxes[step][image][class_id] = cls_dets.cpu().numpy()

                #collect groud truth boxes for the image
                index = np.unique(np.nonzero(gtbb[image])[0])
                gtbox = gtbb[image][index]
                label = gtlabels[image][index]

                #take groundtruth box only if the label =1 for that class
                box[class_id] = [
                    gtbox[i] for i in range(len(label)) if label[i, class_id]
                ]
                num_gt[class_id] += np.sum(len(box[class_id]))
            match_dt_gt(e, imgid, all_boxes[step][image], box,
                        activity2id_person)
            if (step + 1) % 50 == 0:
                output = ('Test: [{0}/{1}]\t'.format(step,
                                                     (val_iters_per_epoch)))
                print(output)

    aps = aggregate_eval(e, maxDet=max_per_image)
    mAP = (mean(aps[target] for target in aps.keys()))

    for k, v in aps.items():
        output = ('class: [{0}] - {1}'.format(k, v))
        log.write(output + '\n')
        print(output)
    mAPout = ('mAP at epoch {0}: {1}'.format(epoch, mAP))
    print('mAP at epoch {0}: {1} \n'.format(epoch, mAP))
    log.write(mAPout + '\n')
    log.flush()
Example #11
0
    def postprocess(self, instance, im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        proposals = instance[constants.KEY_BOXES_2D]
        rpn_cls_probs = instance[constants.KEY_OBJECTNESS]

        batch_size = rpn_cls_probs.shape[0]

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[..., 1]

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(proposals)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(proposals_single, fg_probs_single,
                             self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single

        instance[constants.KEY_BOXES_2D] = proposals_batch
        # TODO(assign rpn_cls_probs)

        return instance
Example #12
0
def mono_test_keypoint(eval_config, data_loader, model):
    """
    Only one image in batch is supported
    """
    num_samples = len(data_loader)
    for i, data in enumerate(data_loader):
        img_file = data['img_name']
        start_time = time.time()
        pred_boxes, scores, rois, anchors, rcnn_3d, keypoints = im_detect(
            model, to_cuda(data), eval_config, im_orig=data['img_orig'])
        duration_time = time.time() - start_time

        # import ipdb
        # ipdb.set_trace()
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        rois = rois.squeeze()
        rcnn_3d = rcnn_3d.squeeze()
        keypoints = keypoints.squeeze()
        # anchors = anchors.squeeze()

        classes = eval_config['classes']
        thresh = eval_config['thresh']

        dets = []
        res_rois = []
        res_anchors = []
        dets_3d = []
        keypoint_dets = []
        # import ipdb
        # ipdb.set_trace()
        # nms
        for j in range(1, len(classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if eval_config['class_agnostic']:
                    cls_boxes = pred_boxes[inds, :]
                    rois_boxes = rois[inds, :]
                    anchors_boxes = anchors[inds, :]
                    rcnn_3d = rcnn_3d[inds]
                    keypoints = keypoints[inds]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1)
                anchors_dets = torch.cat(
                    (anchors_boxes, cls_scores.unsqueeze(1)), 1)

                cls_dets = cls_dets[order]
                rois_dets = rois_dets[order]
                anchors_dets = anchors_dets[order]
                rcnn_3d = rcnn_3d[order]
                keypoints = keypoints[order]

                keep = nms(cls_dets, eval_config['nms'])

                cls_dets = cls_dets[keep.view(-1).long()]
                rois_dets = rois_dets[keep.view(-1).long()]
                anchors = anchors_dets[keep.view(-1).long()]
                rcnn_3d = rcnn_3d[keep.view(-1).long()]
                keypoints = keypoints[keep.view(-1).long()]

                cls_dets = cls_dets.detach().cpu().numpy()
                res_rois.append(rois_dets.detach().cpu().numpy())
                res_anchors.append(anchors.detach().cpu().numpy())

                coords = data['coords'][0].detach().cpu().numpy()
                gt_boxes = data['gt_boxes'][0].detach().cpu().numpy()
                gt_boxes_3d = data['gt_boxes_3d'][0].detach().cpu().numpy()
                points_3d = data['points_3d'][0].detach().cpu().numpy()
                local_angles_gt = data['local_angle'][0].detach().cpu().numpy()
                local_angle_oritation_gt = data['local_angle_oritation'][
                    0].detach().cpu().numpy()
                encoded_side_points = data['encoded_side_points'][0].detach(
                ).cpu().numpy()
                points_3d = points_3d.T

                p2 = data['p2'][0].detach().cpu().numpy()
                rcnn_3d = rcnn_3d.detach().cpu().numpy()
                keypoints = keypoints.detach().cpu().numpy()
                # rcnn_3d_gt = rcnn_3d_gt.detach().cpu().numpy()

                # use gt
                use_gt = False

                if use_gt:
                    keypoints_gt = data['keypoint_gt'][0].detach().cpu().numpy(
                    )
                    #  import ipdb
                    #  ipdb.set_trace()

                    center_x = (gt_boxes[:, 0] + gt_boxes[:, 2]) / 2
                    center_y = (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2
                    gt_boxes_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
                    gt_boxes_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
                    center = np.stack([center_x, center_y], axis=-1)
                    gt_boxes_dims = np.stack([gt_boxes_w, gt_boxes_h], axis=-1)

                    point1 = encoded_side_points[:, :2] * gt_boxes_dims + center
                    point2 = encoded_side_points[:,
                                                 2:] * gt_boxes_dims + center

                    global_angles_gt = gt_boxes_3d[:, -1:]

                    rcnn_3d_gt = np.concatenate(
                        [gt_boxes_3d[:, :3], point1, point2], axis=-1)
                    # just for debug
                    if len(rcnn_3d_gt):
                        cls_dets_gt = np.concatenate(
                            [gt_boxes,
                             np.zeros_like(gt_boxes[:, -1:])],
                            axis=-1)
                        rcnn_3d_gt, _ = mono_3d_postprocess_bbox(
                            rcnn_3d_gt, cls_dets_gt, p2)

                        dets.append(
                            np.concatenate([cls_dets_gt, rcnn_3d_gt], axis=-1))
                        keypoint_dets.append(keypoints_gt)
                    else:
                        dets.append([])
                        res_rois.append([])
                        res_anchors.append([])
                        dets_3d.append([])
                        keypoint_dets.append([])
                else:
                    # import ipdb
                    # ipdb.set_trace()
                    # sample_name = os.path.splitext(os.path.basename(data['img_name'][0]))[0]
                    # if sample_name=='000031':
                    # import ipdb
                    # ipdb.set_trace()
                    #  rcnn_3d[:, :-1] = gt_boxes_3d[:, :3]
                    # global_angles_gt = gt_boxes_3d[:, -1:]
                    # rcnn_3d = np.concatenate(
                    # [gt_boxes_3d[:, :3], global_angles_gt], axis=-1)
                    # rcnn_3d[:,3] = 1-rcnn_3d[:,3]
                    rcnn_3d, location = mono_3d_postprocess_bbox(
                        rcnn_3d, cls_dets, p2)
                    # rcnn_3d = mono_3d_postprocess_angle(rcnn_3d, cls_dets, p2)
                    # rcnn_3d = mono_3d_postprocess_depth(rcnn_3d, cls_dets, p2)
                    # rcnn_3d[:, 3:6] = location
                    # rcnn_3d = np.zeros((cls_dets.shape[0], 7))
                    dets.append(np.concatenate([cls_dets, rcnn_3d], axis=-1))
                    keypoints = keypoints.reshape((keypoints.shape[0], -1))
                    keypoint_dets.append(keypoints)

            else:
                dets.append([])
                res_rois.append([])
                res_anchors.append([])
                dets_3d.append([])
                keypoint_dets.append([])

        # import ipdb
        # ipdb.set_trace()
        save_dets(dets, img_file[0], 'kitti', eval_config['eval_out'])
        save_keypoints(keypoint_dets[0], img_file[0])
        # save_dets(res_rois[0], img_file[0], 'kitti',
        # eval_config['eval_out_rois'])
        # save_dets(res_anchors[0], img_file[0], 'kitti',
        # eval_config['eval_out_anchors'])

        sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples,
                                                       duration_time))
        sys.stdout.flush()
Example #13
0
def mono_test(eval_config, data_loader, model):
    """
    Only one image in batch is supported
    """
    num_samples = len(data_loader)
    end_time = 0
    for i, data in enumerate(data_loader):
        data_time = time.time() - end_time
        img_file = data['img_name']
        start_time = time.time()
        pred_boxes, scores, rois, anchors, rcnn_3d = im_detect(
            model, to_cuda(data), eval_config, im_orig=data['img_orig'])
        det_time = time.time() - start_time

        # import ipdb
        # ipdb.set_trace()
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        rois = rois.squeeze()
        rcnn_3d = rcnn_3d.squeeze()
        # anchors = anchors.squeeze()

        classes = eval_config['classes']
        thresh = eval_config['thresh']
        # print(thresh)
        # thresh = 0.3

        dets = []
        res_rois = []
        res_anchors = []
        dets_3d = []
        # import ipdb
        # ipdb.set_trace()
        # nms
        # new_scores = torch.zeros_like(scores)
        # _, scores_argmax = scores.max(dim=-1)
        # row = torch.arange(0, scores.shape[0]).type_as(scores_argmax)
        # new_scores[row, scores_argmax] = scores[row, scores_argmax]
        for j in range(1, len(classes) + 1):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            post_start_time = time.time()
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]

                cls_boxes = pred_boxes[inds, :]
                #  rois_boxes = rois[inds, :]
                #  anchors_boxes = anchors[inds, :]
                # if not eval_config['class_agnostic_3d']:
                #  rcnn_3d_dets = torch.cat(
                #  [rcnn_3d[inds, j * 3:j * 3 + 3], rcnn_3d[inds, -4:]],
                #  dim=-1)
                # else:
                rcnn_3d_dets = rcnn_3d[inds]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                #  rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1)
                #  anchors_dets = torch.cat(
                #  (anchors_boxes, cls_scores.unsqueeze(1)), 1)

                # sort
                _, order = torch.sort(cls_scores, 0, True)

                cls_dets = cls_dets[order]
                #  rois_dets = rois_dets[order]
                #  anchors_dets = anchors_dets[order]
                rcnn_3d_dets = rcnn_3d_dets[order]

                keep = nms(cls_dets[:, :4], cls_dets[:, -1],
                           eval_config['nms'])

                cls_dets = cls_dets[keep.view(-1).long()]
                #  rois_dets = rois_dets[keep.view(-1).long()]
                #  anchors = anchors_dets[keep.view(-1).long()]
                rcnn_3d_dets = rcnn_3d_dets[keep.view(-1).long()]

                cls_dets = cls_dets.detach().cpu().numpy()
                #  res_rois.append(rois_dets.detach().cpu().numpy())
                #  res_anchors.append(anchors.detach().cpu().numpy())

                coords = data['coords'][0].detach().cpu().numpy()
                gt_boxes = data['gt_boxes'][0].detach().cpu().numpy()
                gt_boxes_2d_proj = data['gt_boxes_proj'][0].detach().cpu(
                ).numpy()
                gt_boxes_3d = data['gt_boxes_3d'][0].detach().cpu().numpy()
                points_3d = data['points_3d'][0].detach().cpu().numpy()
                local_angles_gt = data['local_angle'][0].detach().cpu().numpy()
                local_angle_oritation_gt = data['local_angle_oritation'][
                    0].detach().cpu().numpy()
                encoded_side_points = data['encoded_side_points'][0].detach(
                ).cpu().numpy()
                points_3d = points_3d.T

                p2 = data['orig_p2'][0].detach().cpu().numpy()
                rcnn_3d_dets = rcnn_3d_dets.detach().cpu().numpy()
                cls_orient_gt = data['cls_orient'][0].detach().cpu().numpy()
                reg_orient_gt = data['reg_orient'][0].detach().cpu().numpy()
                # rcnn_3d_gt = rcnn_3d_gt.detach().cpu().numpy()

                # use gt
                use_gt = False
                post_time = 0

                if use_gt:
                    #  import ipdb
                    #  ipdb.set_trace()

                    #  center_x = (gt_boxes[:, 0] + gt_boxes[:, 2]) / 2
                    #  center_y = (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2
                    #  gt_boxes_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
                    #  gt_boxes_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
                    #  center = np.stack([center_x, center_y], axis=-1)
                    #  gt_boxes_dims = np.stack([gt_boxes_w, gt_boxes_h], axis=-1)

                    #  point1 = encoded_side_points[:, :2] * gt_boxes_dims + center
                    #  point2 = encoded_side_points[:, 2:] * gt_boxes_dims + center

                    #  global_angles_gt = gt_boxes_3d[:, -1:]

                    rcnn_3d_gt = np.concatenate([
                        gt_boxes_3d[:, :3], cls_orient_gt[..., np.newaxis],
                        reg_orient_gt
                    ],
                                                axis=-1)
                    # just for debug
                    if len(gt_boxes):
                        cls_dets_gt = np.concatenate(
                            [gt_boxes,
                             np.zeros_like(gt_boxes[:, -1:])],
                            axis=-1)
                        cls_dets_2d_proj_gt = np.concatenate([
                            gt_boxes_2d_proj,
                            np.zeros_like(gt_boxes[:, -1:])
                        ],
                                                             axis=-1)
                        rcnn_3d_gt, _ = mono_3d_postprocess_bbox(
                            rcnn_3d_gt, cls_dets_2d_proj_gt, p2)

                        dets.append(
                            np.concatenate([cls_dets_2d_proj_gt, rcnn_3d_gt],
                                           axis=-1))
                    else:
                        dets.append([])
                        res_rois.append([])
                        res_anchors.append([])
                        dets_3d.append([])
                else:
                    # import ipdb
                    # ipdb.set_trace()
                    # sample_name = os.path.splitext(os.path.basename(data['img_name'][0]))[0]
                    # if sample_name=='000031':
                    # import ipdb
                    # ipdb.set_trace()
                    #  rcnn_3d[:, :-1] = gt_boxes_3d[:, :3]
                    # global_angles_gt = gt_boxes_3d[:, -1:]
                    # rcnn_3d = np.concatenate(
                    # [gt_boxes_3d[:, :3], global_angles_gt], axis=-1)
                    # rcnn_3d[:,3] = 1-rcnn_3d[:,3]
                    # rcnn_3d_dets, location = mono_3d_postprocess_bbox(
                    # rcnn_3d_dets, cls_dets, p2)

                    post_time = time.time() - post_start_time
                    # rcnn_3d = mono_3d_postprocess_angle(rcnn_3d, cls_dets, p2)
                    # rcnn_3d = mono_3d_postprocess_depth(rcnn_3d, cls_dets, p2)
                    # rcnn_3d[:, 3:6] = location
                    # rcnn_3d = np.zeros((cls_dets.shape[0], 7))
                    dets.append(
                        np.concatenate([cls_dets, rcnn_3d_dets], axis=-1))

            else:
                dets.append([])
                res_rois.append([])
                res_anchors.append([])
                dets_3d.append([])
                post_time = 0

        duration_time = time.time() - end_time

        # import ipdb
        # ipdb.set_trace()
        save_dets(dets,
                  img_file[0],
                  'kitti',
                  eval_config['eval_out'],
                  classes_name=eval_config['classes'])
        # save_dets(res_rois[0], img_file[0], 'kitti',
        # eval_config['eval_out_rois'])
        # save_dets(res_anchors[0], img_file[0], 'kitti',
        # eval_config['eval_out_anchors'])

        sys.stdout.write(
            '\r{}/{},duration: {}, det_time: {}, post_time: {}, data_time: {}'.
            format(i + 1, num_samples, duration_time, det_time, post_time,
                   data_time))
        sys.stdout.flush()

        end_time = time.time()
Example #14
0
def test_2d(eval_config, data_loader, model):
    """
    Only one image in batch is supported
    """
    num_samples = len(data_loader)
    for i, data in enumerate(data_loader):
        img_file = data['img_name']
        start_time = time.time()
        pred_boxes, scores, rois, anchors = im_detect_2d(
            model, to_cuda(data), eval_config, im_orig=data['img_orig'])
        duration_time = time.time() - start_time

        # import ipdb
        # ipdb.set_trace()
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        rois = rois.squeeze()

        classes = eval_config['classes']
        thresh = eval_config['thresh']
        # thresh = 0.1
        # import ipdb
        # ipdb.set_trace()

        dets = []
        res_rois = []
        res_anchors = []
        dets_3d = []

        n_classes = (len(classes) + 1)
        # nms
        #  import ipdb
        #  ipdb.set_trace()
        for j in range(1, len(classes) + 1):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[inds, j]

                if not eval_config['class_agnostic']:
                    pred_boxes_per_class = pred_boxes.contiguous().view(
                        -1, 4 * n_classes)[:, j * 4:(j + 1) * 4]
                    cls_boxes = pred_boxes_per_class[inds, :]
                else:
                    cls_boxes = pred_boxes[inds, :]
                #  rois_boxes = rois[inds, :]
                #  anchors_boxes = anchors[inds, :]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                #  rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1)
                #  anchors_dets = torch.cat(
                #  (anchors_boxes, cls_scores.unsqueeze(1)), 1)

                # sort
                _, order = torch.sort(cls_scores, 0, True)

                cls_dets = cls_dets[order]
                #  rois_dets = rois_dets[order]
                #  anchors_dets = anchors_dets[order]

                keep = nms(cls_dets, eval_config['nms'])

                cls_dets = cls_dets[keep.view(-1).long()]
                #  rois_dets = rois_dets[keep.view(-1).long()]
                #  anchors = anchors_dets[keep.view(-1).long()]

                #  res_rois.append(rois_dets.detach().cpu().numpy())
                #  res_anchors.append(anchors.detach().cpu().numpy())

                rcnn_3d = np.zeros((cls_dets.shape[0], 7))
                dets.append(np.concatenate([cls_dets, rcnn_3d], axis=-1))

            else:
                dets.append([])
                res_rois.append([])
                res_anchors.append([])
                dets_3d.append([])

        #  import ipdb
        #  ipdb.set_trace()
        save_dets(dets,
                  img_file[0],
                  'kitti',
                  eval_config['eval_out'],
                  classes_name=eval_config['classes'])

        sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples,
                                                       duration_time))
        sys.stdout.flush()
Example #15
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, fg_scores, rpn_reg_loss = \
            self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        rpn_prior_loss = torch.FloatTensor([0.]).cuda()

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            if self.rpn_prior_weight != 0.:
                for i in range(batch_size):
                    gt_num = num_boxes[i].detach().cpu().item()
                    score = fg_scores[i]
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)  # p * log(p)
                    rpn_prior_loss += (-1. * log_score.sum() / float(gt_num))

                rpn_prior_loss /= batch_size
                rpn_prior_loss *= self.rpn_prior_weight
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = torch.FloatTensor([0.]).cuda()
            rpn_loss_bbox = torch.FloatTensor([0.]).cuda()

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        head_reg_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_reg_weight != 0.:
            head_reg_loss = (pooled_feat**2).mean() * self.head_reg_weight

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # sample loc data
        normal_dist = torch.randn(bbox_pred.size(0), 4).float().cuda()
        log_sigma_2 = bbox_pred[:, :4]
        miu = bbox_pred[:, 4:]
        sigma = torch.exp(log_sigma_2 / 2.)
        sample_loc_data = normal_dist * sigma * self.sample_sigma + miu
        bbox_pred = sample_loc_data

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = torch.FloatTensor([0.]).cuda()
        RCNN_loss_bbox = torch.FloatTensor([0.]).cuda()

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        head_prior_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_prior_weight != 0.:
            scores = cls_prob.data  # [batch, num_rois, classes]
            scores_gradient = cls_prob  # [batch, num_rois, classes]
            boxes = rois.data[:, :, 1:5]  # [batch, num_rois, 4]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # [batch, num_rois, 4]
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if self.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1,
                                                     4 * len(self.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
            else:
                # Simply repeat the boxes, once for each class
                print("no use bbox head in IB")
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_info[:, 2].data[:, None,
                                             None]  # [batch, num_rois, 4]
            loss_count = 0.
            gt_classes = gt_boxes[:, :, -1].data  # [batch, num(0 pad to 20)]
            for i in range(batch_size):
                for j in range(1, len(self.classes)):  # skip background class
                    if not (gt_classes[i] == j).any():  # no such class in gt
                        continue
                    # there are gt for this class
                    inds = torch.nonzero(
                        scores[i, :, j] > self.nms_threshold).view(-1)
                    if inds.numel() == 0:
                        continue
                    cls_scores = scores[i, :, j][inds]  # [num]
                    cls_scores_gradient = scores_gradient[i, :, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if self.class_agnostic:
                        cls_boxes = pred_boxes[i, inds, :]  # [num, 4]
                    else:
                        cls_boxes = pred_boxes[i, inds][:, j * 4:(j + 1) * 4]
                    cls_scores_gradient = cls_scores_gradient[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    score = cls_scores_gradient[keep.view(
                        -1).long()]  # [num_keep]
                    gt_num = (gt_classes[i] == j).sum().detach().cpu().item()
                    if score.size(0) <= gt_num:
                        continue
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)
                    head_prior_loss += (-1. * log_score.sum() / float(gt_num))
                    loss_count += 1.

            head_prior_loss /= loss_count
            head_prior_loss *= self.head_prior_weight

        return rois, cls_prob, bbox_pred, \
               rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
               rpn_prior_loss, rpn_reg_loss, head_prior_loss, head_reg_loss