Example #1
0
    def forward_rcnn_batch(self, base_feat, branch, rois, wgt_boxes, wnum_boxes, gt_boxes, num_boxes, im_info, image_classes, output_refine=False):
        batch_size = base_feat.size(0)

        # if it is training phrase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(
                rois, wgt_boxes, wnum_boxes, gt_boxes, num_boxes)
            out_rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            out_rois = rois
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None

        out_rois = Variable(out_rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(
                out_rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(
                base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, out_rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, out_rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat, branch)

        # compute bbox offset
        bbox_pred = branch.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(
                bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(
                rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = branch.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:

            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(
                bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

            # add image-level label regularization
            rois_batch_size = out_rois.size(1)
            rois_prob = F.softmax(cls_score, 1).view(batch_size, rois_batch_size, -1)

            valid_rois_prob = (rois_label > 0).view(batch_size, rois_batch_size, -1).float()
            rois_attention = F.softmax(cls_score, 1).view(batch_size, rois_batch_size, -1)
            rois_attention = rois_attention * valid_rois_prob

            # ignore background
            rois_prob = rois_prob[:, :, 1:]
            rois_attention = rois_attention[:, :, 1:]

            # rois_attention_prob = torch.sum(rois_prob * rois_attention, dim=1) / (torch.sum(rois_attention, dim=1) + 1e-10)
            rois_attention_prob, _ = torch.max(rois_prob, dim=1)
            image_loss_cls = F.binary_cross_entropy(rois_attention_prob, image_classes[:, 1:])
        else:
            image_loss_cls = None

        if self.training:
            cls_prob = cls_prob.view(batch_size, out_rois.size(1), -1)
            bbox_pred = bbox_pred.view(batch_size, out_rois.size(1), -1)
        else:
            cls_prob = cls_prob.view(1, out_rois.size(1), -1)
            bbox_pred = bbox_pred.view(1, out_rois.size(1), -1)

        if self.training and output_refine:
            # get transformation for wgt_boxes
            wgt_rois = wgt_boxes.new(wgt_boxes.size()).zero_()
            wgt_rois[:, :, 1:5] = wgt_boxes[:, :, :4]
            batch_size = base_feat.size(0)
            for i in range(batch_size):
                wgt_rois[:, :, 0] = i

            # do roi pooling based on predicted rois
            if cfg.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(
                    wgt_rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
                gt_pooled_feat = self.RCNN_roi_crop(
                    base_feat, Variable(grid_yx).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    gt_pooled_feat = F.max_pool2d(gt_pooled_feat, 2, 2)
            elif cfg.POOLING_MODE == 'align':
                gt_pooled_feat = self.RCNN_roi_align(
                    base_feat, wgt_rois.view(-1, 5))
            elif cfg.POOLING_MODE == 'pool':
                gt_pooled_feat = self.RCNN_roi_pool(
                    base_feat, wgt_rois.view(-1, 5))

            # feed pooled features to top model
            gt_pooled_feat = self._head_to_tail(gt_pooled_feat, branch)

            # compute bbox offset
            wgt_bbox_delta = branch.RCNN_bbox_pred(gt_pooled_feat)
            wgt_bbox_delta = wgt_bbox_delta.view(-1, 4) * torch.FloatTensor(
                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            wgt_bbox_delta = wgt_bbox_delta.view(batch_size, -1, 4 * 21)
            wgt_bbox_out_rois = bbox_transform_inv(
                wgt_boxes, wgt_bbox_delta, batch_size)

            wgt_bbox_out_rois = clip_boxes(
                wgt_bbox_out_rois, im_info.data, batch_size)

            wgt_bbox_out = wgt_boxes.new(wgt_boxes.size()).zero_()

            wgt_cls = Variable(
                wgt_boxes[:, :, 4].data, requires_grad=False).long()
            for i in range(batch_size):
                for j in range(20):
                    cls_ind = wgt_cls[i, j]
                    wgt_bbox_out[i, j, :4] = wgt_bbox_out_rois[i,
                                                               j, cls_ind * 4:cls_ind * 4 + 4]

            wgt_bbox_out[:, :, 4] = wgt_boxes[:, :, 4]

            wgt_boxes_x = (wgt_boxes[:, :, 2] - wgt_boxes[:, :, 0] + 1)
            wgt_boxes_y = (wgt_boxes[:, :, 3] - wgt_boxes[:, :, 1] + 1)
            wgt_area_zero = (wgt_boxes_x == 1) & (wgt_boxes_y == 1)
            wgt_bbox_out.masked_fill_(wgt_area_zero.view(
                batch_size, wgt_area_zero.size(1), 1).expand(wgt_boxes.size()), 0)
            wgt_bbox_out = wgt_bbox_out.detach()
        else:
            wgt_bbox_out = None

        return (out_rois, cls_prob, bbox_pred, RCNN_loss_cls, RCNN_loss_bbox, rois_label, image_loss_cls), wgt_bbox_out
Example #2
0
        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
def inference(_test_img_path,
              _check_point,
              _score_threshold=0.3,
              class_agnostic=False):
    test_img_path = _test_img_path
    check_point = _check_point
    score_threshold = _score_threshold

    device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu")

    fasterRCNN = resnet(cfg.backbone,
                        is_training=False,
                        pretrained=False,
                        class_agnostic=class_agnostic)
    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (check_point))
    checkpoint = torch.load(check_point)
    fasterRCNN.load_state_dict(checkpoint['model_state_dict'])
    print('load model successfully!')

    fasterRCNN.eval()
    fasterRCNN.to(device)

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    im_data = im_data.cuda()
    im_info = im_data.cuda()

    start_time = time.time()

    test_img = cv2.imread(test_img_path)

    test_img_copy = copy.deepcopy(test_img)
    test_img_copy, scale = image_preprocess(test_img_copy)
    test_img_copy = torch.from_numpy(test_img_copy)
    im_info_tensor = torch.Tensor(
        [[[test_img_copy.size(2), test_img_copy.size(3)]]])

    im_data.resize_(test_img_copy.shape).copy_(test_img_copy)
    im_info.resize_(im_info_tensor.shape).copy_(im_info_tensor)

    rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(im_data,
                                                          None)  #without gt
    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    box_deltas = bbox_pred.data
    if cfg.bbox_normalize_targets_precomputed:
        if class_agnostic:
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                         + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
            box_deltas = box_deltas.view(1, -1, 4)
        else:
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                         + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
            print(box_deltas.size())
            box_deltas = box_deltas.view(1, -1, 4 * len(cfg.class_to_ind))
    pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
    pred_boxes = clip_boxes(pred_boxes, (im_data.size(2), im_data.size(3)), 1)
    pred_boxes = pred_boxes / scale

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    for j in range(1, len(cfg.class_to_ind)):
        inds = torch.nonzero(scores[:, j] > score_threshold).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)

            if class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = cls_boxes[order]
            cls_scores = cls_scores[order]

            keep = nms(cls_dets, cls_scores, cfg.test_nms_threshold)
            cls_dets = cls_dets[keep.view(-1).long()]  #当前类别保留下来的目标框
            cls_scores = cls_scores[keep.view(-1).long()]
            test_img = draw_target(test_img, cls_dets, cls_scores, j)

    end_time = time.time()
    print('detect time:{}s'.format(end_time - start_time))

    cv2.imshow('result', test_img)
    cv2.waitKey(0)
Example #4
0
    def forward(self, input):
        # 按照通道C取出RPN预测的框属于前景的分数,请注意,在_num_anchors*2个channel中,
        # 前_num_anchors个是框属于背景的概率,后_num_anchors个才是属于前景的概率
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        is_training = input[3]

        if is_training:
            pre_nms_topN = cfg.train_rpn_pre_nms_top_N
            post_nms_topN = cfg.train_rpn_post_nms_top_N
            nms_thresh = cfg.rpn_nms_thresh
        else:
            pre_nms_topN = cfg.test_rpn_post_nms_top_N
            post_nms_topN = cfg.test_rpn_post_nms_top_N
            nms_thresh = cfg.rpn_nms_thresh

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        print(im_info)
        proposals = clip_boxes(proposals, im_info, batch_size)  #将超出范围的候选框给夹紧使其不超过图像范围

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]  #选取最高的前pre_nms_topN个

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i   #属于哪个batch
            output[i, :num_proposal, 1:] = proposals_single  #候选框坐标

        return output
def evalue(check_point,
           cache_path='./result.pkl',
           class_agnostic=False,
           ovthresh=0.5,
           use_07_metric=False):

    ind_class = {v: k for k, v in cfg.class_to_ind.items()}
    class_result_dic = {k: []
                        for k in cfg.class_to_ind.keys()
                        }  # store every class result

    imagenames = []

    if not os.path.exists(cache_path):

        test_set = PASCAL_VOC(cfg.testset_root_path, 'test')
        dataloader = DataLoader(test_set,
                                batch_size=cfg.batch_size,
                                shuffle=True,
                                num_workers=4)

        device = torch.device(
            "cuda: 0" if torch.cuda.is_available() else "cpu")

        fasterRCNN = resnet(cfg.backbone,
                            is_training=False,
                            pretrained=False,
                            class_agnostic=class_agnostic)
        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (check_point))

        checkpoint = torch.load(check_point)
        fasterRCNN.load_state_dict(checkpoint['model_state_dict'])

        print('load model successfully!')

        fasterRCNN.eval()
        fasterRCNN.to(device)

        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        gt_boxes = torch.FloatTensor(1)
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        gt_boxes = gt_boxes.cuda()

        #detect for result
        for batch_data in tqdm(dataloader):
            # batch_data = dataloader.next()
            with torch.no_grad():
                im_data.resize_(batch_data['image'].size()).copy_(
                    batch_data['image'])
                gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_(
                    batch_data['gt_boxes'])
                im_info.resize_(batch_data['im_info'].size()).copy_(
                    batch_data['im_info'])

                image_name = os.path.basename(
                    batch_data['imname'][0]).split('.')[0]
                imagenames.append(image_name)

                rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(
                    im_data, gt_boxes)

                scores = cls_prob.data
                boxes = rois.data[:, :, 1:5]

                box_deltas = bbox_pred.data

                if cfg.bbox_normalize_targets_precomputed:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                                 + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info, 1)
                pred_boxes = pred_boxes / batch_data['im_info'][0, 2]

                scores = scores.squeeze()
                pred_boxes = pred_boxes.squeeze()

                for j in range(1, len(cfg.class_to_ind)):
                    inds = torch.nonzero(scores[:, j] > 0).view(-1)
                    if inds.numel() > 0:
                        cls_scores = scores[:, j][inds]
                        _, order = torch.sort(cls_scores, 0, True)

                        if class_agnostic:
                            cls_boxes = pred_boxes[inds, :]
                        else:
                            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                        cls_dets = pred_boxes[order]
                        cls_scores = cls_scores[order]

                        keep = nms(cls_dets, cls_scores,
                                   cfg.test_nms_threshold)
                        cls_dets = cls_dets[keep.view(
                            -1).long()]  # 当前类别保留下来的目标框
                        cls_scores = cls_scores[keep.view(-1).long()]

                        for score, bbox in zip(cls_scores, cls_dets):
                            class_result_dic[ind_class[j]].append({
                                'image_name':
                                image_name,
                                'score':
                                score,
                                'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]]
                            })

        print('writting result cache ......')
        with open(cache_path, 'wb') as fp:
            pickle.dump(class_result_dic, fp)
    else:
        with open(
                os.path.join(cfg.testset_root_path, 'ImageSets', 'Main',
                             'test.txt')) as fp:
            for line in fp:
                imagenames.append(line.strip())
        with open(cache_path, 'rb') as fp:
            class_result_dic = pickle.load(fp)

    print('computer mAP... ')
    # computer map
    recs = {}
    for i, imagename in enumerate(imagenames):
        recs[imagename] = parse_rec(
            os.path.join(cfg.testset_root_path, 'Annotations',
                         imagename + '.xml'))

    # extract gt objects for this class
    mAP = 0
    for classname in cfg.class_to_ind.keys():
        if classname == 'BG':
            continue
        print(classname, end=' ')
        class_recs = {}
        npos = 0
        for imagename in imagenames:
            R = [obj for obj in recs[imagename] if obj['name'] == classname]
            bbox = np.array([x['bbox'] for x in R])
            difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
            det = [False] * len(R)
            npos = npos + sum(~difficult)
            class_recs[imagename] = {
                'bbox': bbox,
                'difficult': difficult,
                'det': det
            }

        class_result = class_result_dic[classname]
        image_ids = [r['image_name'] for r in class_result]
        confidence = np.array([float(r['score']) for r in class_result])
        BB = np.array([r['bbox'] for r in class_result])

        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            ovmax = -np.inf
            BBGT = R['bbox'].astype(float)
            if BBGT.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(BBGT[:, 0], bb[0])
                iymin = np.maximum(BBGT[:, 1], bb[1])
                ixmax = np.minimum(BBGT[:, 2], bb[2])
                iymax = np.minimum(BBGT[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin + 1., 0.)
                ih = np.maximum(iymax - iymin + 1., 0.)
                inters = iw * ih

                # union
                uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                       (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                       (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)

            if ovmax > ovthresh:
                if not R['difficult'][jmax]:
                    if not R['det'][jmax]:
                        tp[d] = 1.
                        R['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.

        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = voc_ap(rec, prec, use_07_metric)
        print(ap)
        mAP += ap
    mAP = mAP / (len(cfg.class_to_ind) - 1)

    print('mAP:', mAP)