Exemple #1
0
 def predict_on_img(self, img, preset = 'evaluate', use_softnms=False, return_img = False, with_scores = False, original_size = False):
     '''
     inputs :
     imgs : PIL Image
     return : PIL Image (if return_img) or bboxes_group and labels_group
     '''
     self.eval()
     self.use_preset(preset)
     with torch.no_grad():
         orig_size = img.size # W,H
         img = np.asarray(img).transpose(2,0,1)
         img, scale = prepare_img(self.conf, img, -1)
         img = torch.tensor(img).unsqueeze(0)
         img_size = (img.shape[2], img.shape[3]) # H,W
         detections = self.forward(img, scale)
         n_sample = len(detections.roi_cls_locs)
         n_class = self.conf.class_num + 1
         roi_cls_locs = detections.roi_cls_locs.reshape((n_sample, -1, 4)).reshape([-1,4])
         roi_cls_locs = roi_cls_locs * torch.tensor(self.loc_normalize_std, device=self.conf.device) + torch.tensor(self.loc_normalize_mean, device=self.conf.device)
         rois = torch.tensor(detections.rois.repeat(n_class,0), dtype=torch.float).to(self.conf.device)
         raw_cls_bboxes = loc2bbox(rois, roi_cls_locs)
         torch.clamp(raw_cls_bboxes[:,0::2], 0, img_size[1], out = raw_cls_bboxes[:,0::2] )
         torch.clamp(raw_cls_bboxes[:,1::2], 0, img_size[0], out = raw_cls_bboxes[:,1::2] )
         raw_cls_bboxes = raw_cls_bboxes.reshape([n_sample, n_class, 4])
         raw_prob = F.softmax(detections.roi_scores, dim=1)
         bboxes, labels, scores = self._suppress(raw_cls_bboxes, raw_prob, use_softnms)
         if len(bboxes) == len(labels) == len(scores) == 0:
             if not return_img:  
                 return [], [], []
             else:
                 return to_img(self.conf, img[0])
         _, indices = scores.sort(descending=True)
         bboxes = bboxes[indices]
         labels = labels[indices]
         scores = scores[indices]
         if len(bboxes) > self.max_n_predict:
             bboxes = bboxes[:self.max_n_predict]
             labels = labels[:self.max_n_predict]
             scores = scores[:self.max_n_predict]
     # now, implement drawing
     bboxes = bboxes.cpu().numpy()
     labels = labels.cpu().numpy()
     scores = scores.cpu().numpy()
     if original_size:
         bboxes = adjust_bbox(scale, bboxes, detect=True)
     if not return_img:        
         return bboxes, labels, scores
     else:
         if with_scores:
             scores_ = scores
         else:
             scores_ = []
         predicted_img =  to_img(self.conf, img[0])
         if original_size:
             predicted_img = predicted_img.resize(orig_size)
         if len(bboxes) != 0 and len(labels) != 0:
             predicted_img = draw_bbox_class(self.conf, 
                                             predicted_img, 
                                             labels, 
                                             bboxes, 
                                             self.conf.correct_id_2_class, 
                                             self.class_2_color, 
                                             scores = scores_)
         
         return predicted_img
def test(**kwargs):
    opt._parse(kwargs)
    device = opt.device

    cnn = AlexNet()
    in_features = cnn.classifier[6].in_features
    cnn.classifier[6] = t.nn.Linear(in_features, 21)
    cnn.load(opt.load_trained_path)
    cnn.to(device)
    svms = SVMs(method='load')
    lrs = LRs(method='load')

    dataset = NormalDataset(opt.voc_data_dir, split='test')
    dataloader = t.utils.data.DataLoader(dataset,
                                         batch_size=1,
                                         num_workers=0,
                                         shuffle=False)
    ipdb.set_trace()
    pred_bbox_last_, pred_label_last_, pred_score_last_, gt_bbox_, gt_label_ = [],[],[],[],[]
    for ii, (img, bbox_imgs, bboxes, gt_bbox, gt_label,
             _) in tqdm(enumerate(dataloader)):
        # for ii in tqdm(range(len(dataset))):
        # img,bbox_imgs,bboxes,gt_bbox,gt_label,_ = dataset.getitem(ii)
        img = img[0].numpy()
        bboxes = bboxes[0].numpy()
        gt_bbox = gt_bbox[0].numpy()
        gt_label = gt_label[0].numpy()
        inputs = bbox_imgs.to(device)[0]
        features = cnn.get_features(inputs).cpu().detach().numpy()
        '''
        imOut = cv2.UMat(img.numpy().transpose((1,2,0))*255).get()
        for i, rect in enumerate(bboxes):
            ymin, xmin, ymax, xmax = rect
            cv2.rectangle(imOut, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1, cv2.LINE_AA)
        cv2.imwrite('./pic/1.jpg',imOut)
        
        pred_label,pred_score,pred_bg = svms.predict(features)
        mask = np.where(pred_bg == False)[0]
        
        counts += 1
        if mask.size == 0:
            null_counts += 1
        if ii == 10:
            print("null per:{}".format(1.0*null_counts/counts))
            break
        
        pred_label = pred_label[mask]
        pred_score = pred_score[mask]
        pred_bboxes_unregress = bboxes[mask]  # np.ndarray
        bbox_features = features[mask]
        '''
        pred_label, pred_score = svms.predict(features)
        pred_bboxes_unregress = bboxes
        bbox_features = features
        # mark
        # 获取每个种类的bboxes和score,为了nms
        pred_bboxes_2_nms = dict()
        pred_score_2_nms = dict()
        bbox_features_2_nms = dict()
        for lab in np.unique(pred_label):
            lab_mask = np.where(pred_label == lab)[0]
            pred_bboxes_2_nms[opt.VOC_BBOX_LABEL_NAMES[
                lab]] = pred_bboxes_unregress[lab_mask]
            pred_score_2_nms[
                opt.VOC_BBOX_LABEL_NAMES[lab]] = pred_score[lab_mask]
            bbox_features_2_nms[
                opt.VOC_BBOX_LABEL_NAMES[lab]] = bbox_features[lab_mask]

        # nms & regression
        pred_bbox_last = []
        pred_label_last = []
        pred_score_last = []
        for cat, bbox_nms in pred_bboxes_2_nms.items():
            mask = np.where(pred_score_2_nms[cat] > opt.nms_thresh)[0]
            if mask.size == 0:
                continue
            else:
                bbox_nms = bbox_nms[mask]
                pre_score_2_nms[cat] = pre_score_2_nms[cat][mask]
                features_2_nms = features_2_nms[mask]
            keep_mask = nms(
                t.from_numpy(bbox_nms[:, [1, 0, 3, 2]]).float(),
                t.from_numpy(pred_score_2_nms[cat]).float(), opt.iou_thresh)
            loc = lrs.predict(cat, bbox_features_2_nms[cat][keep_mask])
            pred_bbox_cat = loc2bbox(bbox_nms[keep_mask], loc)

            pred_score_last.append(pred_score_2_nms[cat][keep_mask])
            pred_bbox_last.append(pred_bbox_cat)
            pred_label_last.extend([cat] * pred_bbox_cat.shape[0])
        if len(pred_label_last) > 0:
            wrong_2_draw(img, np.vstack(pred_bbox_last),
                         np.array(pred_label_last), gt_bbox, gt_label)

            pred_bbox_last_ += np.vstack(pred_bbox_last)
            pred_label_last_ += np.array(pred_label_last)
            pred_score_last_ += np.hstack(pred_Score_last)
            gt_bbox_ += gt_bbox
            gt_label_ += gt_label
    # evaluation
    res = eval_detection_voc(pred_bbox_last_, pred_label_last_,
                             pred_score_last_, gt_bbox_, gt_label_)
    print(res)
    pd.DataFrame(res).to_excel('./res.xlsx')
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.
        loc.shape, score.shape,anchor.shape, img_size, scale = 
        ((16650, 4),
         (16650,),
         (16650, 4),
         (600, 800),
         tensor([ 1.6000], dtype=torch.float64))
         16650 = 37(hh) * 50(ww) * 9
        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations
        roi = loc2bbox(anchor, loc)
        # 这个函数输出的roi是x1y1x2y2格式
        """
        loc2bbox这个函数把之前rpn网络算出来的hh*ww*15个loc和hh*ww*15个anchor
        结合起来,套公式,算出最终预测出来的hh*ww*15个bbox,这里直接就叫roi了
        """

        # Clip predicted boxes to image.
        roi[:, 0:4:2] = np.clip(roi[:, 0:4:2], 0, img_size[1])
        # roi[:, [0,2]] 跟 roi[:, slice(0, 4, 2)] 不是一样嘛
        # 求出[y1,y2]之后用np.clip去掉bboxes伸出到图像尺寸之外的部分
        # 注意这里的img_size是原始图像经过放缩之后,输入到神经网络的size
        roi[:, 1:4:2] = np.clip(roi[:, 1:4:2], 0, img_size[0])

        # Remove predicted boxes with either height or width < threshold.

        # 这里的scale(比如说1.6),代表了原始图像经过了scale倍的放大
        # 所以原图16个像素,经过了1.6倍的放大到网络的输入,这里应该用25.6来判断是否丢弃
        min_size = self.min_size * scale
        ws = roi[:, 2] - roi[:, 0]
        hs = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        # 重新计算高和宽是为了淘汰掉一批小于25.6的框
        roi = roi[keep, :]
        score = score[keep]
        # 剩下来的roi和对应的score,score是这个roi里是前景的概率

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]  # 把score从大到小排序,取相应的序号
        if n_pre_nms > 0:  # 无论如何n_pre_nms都是大于0的吧 ?
            order = order[:n_pre_nms]
        roi = roi[order, :]  # 取最大的n_pre_nms个roi出来
        #         score = score[order]
        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        keep = nms(
            torch.cat(
                (torch.tensor(roi), torch.tensor(score[order]).unsqueeze(1)),
                dim=1).cuda(), self.nms_thresh).tolist()
        # 调用CUPY版本的 nms
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        # 最终输出n_post_nms个roi
        return roi
Exemple #4
0
    def predict(self, imgs, sizes=None, visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = (F.softmax(at.totensor(roi_score), dim=1))

            bbox, label, score = self._suppress(cls_bbox, prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

        Args:
            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

        Returns:
            array:
            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        """
        # NOTE: when test, remember
        # r_fcn.eval()
        # to set self.traing = False
        if self.rpn_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert the anchors to the ROIs
        rois = loc2bbox(anchor, loc)

        # clip rois
        rois[:, slice(0, 4, 2)] = np.clip(rois[:, slice(0, 4, 2)], 0,
                                          img_size[0])
        rois[:, slice(1, 4, 2)] = np.clip(rois[:, slice(1, 4, 2)], 0,
                                          img_size[1])

        # remove small rois
        min_size = self.min_size * scale
        hs = rois[:, 2] - rois[:, 0]  # height
        ws = rois[:, 3] - rois[:, 1]  # width
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]

        rois = rois[keep, :]
        score = score[keep]

        # sorted by score
        # get topN anchors to NMS, e.g.N=12000(training),6000(testing)
        order = score.ravel().argsort()[::-1]  # [::-1]表示倒序
        if n_pre_nms > 0:
            order = order[:n_pre_nms]  # shape:(n_pre_nms, )
        rois = rois[order, :]
        score = score[order]
        keep = torch.ops.torchvision.nms(
            torch.from_numpy(rois).cuda(),
            torch.from_numpy(score).cuda(), self.nms_thresh)

        if n_post_nms > 0:
            keep = keep[:n_post_nms]

        rois = rois[keep.cpu().numpy()]
        # rois_score = score[keep.cpu().numpy()]

        return rois