コード例 #1
0
    def filter_results(self, boxlist, num_classes, return_idxs=False):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        idxs = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            if return_idxs:
                boxlist_for_class, boxlist_idxs = boxlist_nms(
                    boxlist_for_class, self.nms, return_idxs=True)
                inds_j = inds[boxlist_idxs]
            else:
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                self.nms,
                                                return_idxs=False)
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)
            if return_idxs:
                idxs.append(inds_j)

        result = cat_boxlist(result)
        number_of_detections = len(result)
        if return_idxs:
            idxs = torch.cat(idxs, dim=0)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
            if return_idxs:
                idxs = idxs[keep]

        if return_idxs:
            return result, idxs
        return result
コード例 #2
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            scores_all = boxlists[i].get_field("scores_all")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)

                scores_j = scores[inds]
                scores_j_all = scores_all[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class.add_field("scores_all", scores_j_all)
                boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms_thresh,
                    score_field="scores", iou_flag=True
                )
                if self.use_nms_iom:
                    boxlist_for_class = boxlist_nms(
                            boxlist_for_class, self.nms_iom, score_field="scores", iou_flag=False
                    )
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels", torch.full((num_labels,), j,
                                         dtype=torch.int64,
                                         device=scores.device)
                )
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            if self.use_nms_inter_class:
                result = boxlist_nms(
                    result, self.nms_inter_class, score_field="scores", iou_flag=True
                )
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1
                )
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
コード例 #3
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()
        #object: (B, H*W*A)

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        #box_regression: (B, H*W*A, 4)
        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        # select the biggest pre_nms_top_n

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        # select the corresponding box_regression

        # anchors: (boxlist(H/4*W/4*len(aspect_ratios))*B)
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        # anchors: (B*(H/4*W/4*len(aspect_ratios), 4)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        # anchors: (B, topk_idx, 4)

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))
        # proposals: (B*topk_idx, 4)

        proposals = proposals.view(N, -1, 4)
        # proposals: (B, topk_idx, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        # result: [Boxlist(with objectness)*B]
        return result
コード例 #4
0
    def _class_independ_nms(self, boxlist, nms_thresh):
        """
        类别独立的非极大抑制(nms)
        :param boxlist (BoxList): 输入检测目标框
        :param nms_thresh (float): nms阈值
        :return:
        """
        scores = boxlist.get_field("scores")
        labels = boxlist.get_field("labels")
        boxes = boxlist.bbox

        num_classes = self.spire_anno.num_classes
        result = []
        for i in range(1, num_classes + 1):  # boxlist含有背景类0
            inds = (labels == i).nonzero().view(-1)

            scores_i = scores[inds]
            boxes_i = boxes[inds, :].view(-1, 4)
            boxlist_for_class = BoxList(boxes_i, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_i)
            boxlist_for_class = boxlist_nms(boxlist_for_class,
                                            nms_thresh,
                                            score_field="scores")
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ),
                           i,
                           dtype=torch.int64,
                           device=scores.device))
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        return result
コード例 #5
0
ファイル: inference.py プロジェクト: RyanXLi/OneshotDet
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []

            scores_j = scores  #[inds]
            boxes_j = boxes  #[inds, :].view(-1, 4)
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)

            boxlist_for_class = boxlist_nms(boxlist_for_class,
                                            self.nms_thresh,
                                            score_field="scores")

            num_labels = len(boxlist_for_class)

            result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                sorted_cls_scores, sorted_cls_indices = torch.sort(
                    cls_scores, descending=True)
                keep = sorted_cls_indices[:self.fpn_post_nms_top_n]

                result = result[keep]
            results.append(result)
        return results
コード例 #6
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            coeffs = boxlists[i].get_field("coeffs")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            if cfg.MODEL.YOLACT.USE_FAST_NMS:
                scores, boxes, coeffs, labels = self.fast_nms(scores, boxes, coeffs)
                result = BoxList(boxes, boxlist.size, mode="xyxy")
                result.add_field("scores", scores)
                result.add_field("coeffs", coeffs)
                result.add_field("labels", labels)
            else:
                labels = boxlists[i].get_field("labels")
                result = []
                # skip the background
                for j in range(1, self.num_classes):
                    inds = (labels == j).nonzero().squeeze(1)

                    # if inds.numel() == 0:
                    #     continue

                    scores_j = scores[inds]
                    coeffs_j = coeffs[inds, :]
                    boxes_j = boxes[inds, :]
                    boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                    boxlist_for_class.add_field("scores", scores_j)
                    boxlist_for_class.add_field("coeffs", coeffs_j)
                    # per class nms
                    boxlist_for_class = boxlist_nms(
                        boxlist_for_class, self.nms_thresh,
                        score_field="scores"
                    )
                    num_labels = len(boxlist_for_class)
                    boxlist_for_class.add_field(
                        "labels", torch.full((num_labels,), j,
                                            dtype=torch.int64,
                                            device=scores.device)
                    )
                    result.append(boxlist_for_class)
                result = cat_boxlist(result)
            
                # Limit to max_per_image detections **over all classes**
                number_of_detections = len(result)
                if number_of_detections > self.fpn_post_nms_top_n > 0:
                    cls_scores = result.get_field("scores")
                    image_thresh, _ = torch.kthvalue(
                        cls_scores.cpu(),
                        number_of_detections - self.fpn_post_nms_top_n + 1
                    )
                    keep = cls_scores >= image_thresh.item()
                    keep = torch.nonzero(keep).squeeze(1)
                    result = result[keep]

            results.append(result)
        return results
コード例 #7
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression, box_orien):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # print(objectness.size(),box_regression.size(),box_orien.size(),'==============================')
        # put in the same format as anchors
        objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
        objectness = objectness.sigmoid()
        box_orien = box_orien.view(N, -1, 2, H, W).permute(0, 3, 4, 1, 2)
        box_orien = box_orien.reshape(N, -1, 2)
        box_regression = box_regression.view(N, -1, 4, H,
                                             W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)
        # print(objectness.size(), box_orien.size(),topk_idx.size() ,'==============================oo')
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]
        box_orien = box_orien[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        # print(proposals.size(), objectness.size(), box_orien.size(), '==============================oo')
        for proposal, score, im_shape, orien in zip(proposals, objectness,
                                                    image_shapes, box_orien):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist.add_field("rotations", orien)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size, self.max_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
def track_per_video(video_name):
    print(video_name)
    json_path = os.path.join(root, video_name+'.json')
    with open(json_path, 'r') as f:
        proposal_dict = json.load(f)
    gt = None
    frame_num = len(proposal_dict)
    boxes = np.zeros((frame_num, 4))  # xywh
    times = np.zeros(frame_num)
    i = 0
    for img_name, proposals_ in proposal_dict.items():
        start_time = time.time()
        if img_name == '00000001.jpg':
            gt = proposals_[0][:-1]
            boxes[0] = gt
            times[0] = time.time() - start_time
            gt = torch.Tensor(gt).reshape(1, 4)
            gt = BoxList(gt, (-1,-1), mode="xywh").convert("xyxy")
            i += 1
            continue
        proposals = [proposal[:-1] for proposal in proposals_]
        scores = [proposal[-1] for proposal in proposals_]
        scores = torch.Tensor(scores)
        proposals = torch.Tensor(proposals)
        proposals = BoxList(proposals, (-1,-1), mode="xywh").convert("xyxy")
        proposals.add_field('objectness', scores)
        '''对proposals执行nms,保留top_n个样本。'''
        proposals_nms = boxlist_nms(
            proposals,
            0.1,
            max_proposals=10,
            score_field="objectness",
        )
        last_box = torch.Tensor(boxes[i - 1]).reshape(1, 4)
        last_box = BoxList(last_box, (-1, -1), mode="xywh").convert("xyxy")
        overlaps = boxlist_iou(proposals_nms, last_box).squeeze(0)
        selected_id = torch.argmax(overlaps)
        if overlaps[selected_id] == 0:
            print('消失')
            selected_id = torch.argmax(proposals_nms.extra_fields['objectness'])
        proposals_nms = proposals_nms.convert("xywh")
        res_box = proposals_nms.bbox[selected_id].cpu().numpy()
        boxes[i] = res_box
        # visualization(video_name, img_name, proposals_nms.bbox, res_box, boxes[i - 1])
        times[i] = time.time() - start_time
        i += 1
    '''保存该帧跟踪结果'''
    record_file = os.path.join(cfg.OUTPUT_DIR,
        'result', video_name,
        '%s_%03d.txt' % (video_name, 1))
    record_dir = os.path.dirname(record_file)
    if not os.path.isdir(record_dir):
        os.makedirs(record_dir)
    np.savetxt(record_file, boxes, fmt='%.3f', delimiter=',')
    '''保存时间文件'''
    time_file = record_file[:record_file.rfind('_')] + '_time.txt'
    times = times[:, np.newaxis]
    np.savetxt(time_file, times, fmt='%.8f', delimiter=',')
コード例 #9
0
    def filter_results(self, boxlist, num_classes, feature=None):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        boxlist_empty = self.prepare_empty_boxlist(boxlist)
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)

            if len(inds)>0:
                scores_j = scores[inds, j]
                boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                
                if self.output_feature:
                    feature_j = feature[inds]
                    boxlist_for_class.add_field("box_features", feature_j)
                    
                    scores_all = scores[inds]
                    boxlist_for_class.add_field("scores_all", scores_all)
                    boxlist_for_class.add_field("boxes_all",
                                                boxes[inds].view(-1, num_classes, 4))

                boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms
                )
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
                )
                result.append(boxlist_for_class)
            else:
                result.append(boxlist_empty)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #10
0
ファイル: inference.py プロジェクト: zhwzhong/NAS-FCOS
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, 81):
                inds = (labels == j).nonzero().view(-1)
                if len(inds) == 0:
                    continue

                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                self.nms_thresh,
                                                score_field="scores")
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ),
                               j,
                               dtype=torch.int64,
                               device=scores.device))
                result.append(boxlist_for_class)

            if len(result):
                result = cat_boxlist(result)
                number_of_detections = len(result)

                # Limit to max_per_image detections **over all classes**
                if number_of_detections > self.fpn_post_nms_top_n > 0:
                    cls_scores = result.get_field("scores")
                    image_thresh, _ = torch.kthvalue(
                        cls_scores.cpu(),
                        number_of_detections - self.fpn_post_nms_top_n + 1)
                    keep = cls_scores >= image_thresh.item()
                    keep = torch.nonzero(keep).squeeze(1)
                    result = result[keep]
                results.append(result)

            else:
                device = boxlist.bbox.device
                empty_boxlist = BoxList(
                    torch.zeros(1, 4).to(device), boxlist.size)
                empty_boxlist.add_field("labels",
                                        torch.LongTensor([1]).to(device))
                empty_boxlist.add_field("scores",
                                        torch.Tensor([0.01]).to(device))
                results.append(empty_boxlist)
        return results
コード例 #11
0
    def filter_results(self, boxlist, num_classes):
        """首先移除 score<=score_thresh 的box, 然后对box进行nms, 最后对所有保留的box按score
        排序, 并保留置信度最大的前detections_per_img个box
        """
        # [roi_per_img*81, 4] --> [roi_per_img, 81*4]
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        # [roi_per_img*81] --> [roi_per_img, 81]
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device

        # result中存放每个类别对应的boxlist对象
        result = []

        # [roi_per_img, 81], scores中大于阈值的位置为1, 小于的为0
        inds_all = scores > self.score_thresh  # 0.05

        for j in range(1, num_classes):  # 0代表背景
            # 所有roi中, 第j个类别的score大于阈值的roi索引值
            inds = inds_all[:, j].nonzero().squeeze(1)

            # 第j类中, 所有大于阈值的score
            scores_j = scores[inds, j]

            # 第j类中, 所有score大于阈值的box
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]

            # 构造当前类别的BoxList对象
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms)
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)

        # 将各个类别的BoxList对象合并成一个
        result = cat_boxlist(result)

        # 过滤之前共有 roi_per_img*81 个box, 经过上面score阈值过滤和nms后剩余的box数量
        number_of_detections = len(result)

        # self.detections_per_img默认为100, 即每张图片所有类别的box最多保留100个
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            # 返回第k小的元素和索引值
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #12
0
    def my_filter_results(self, boxlist, num_classes):
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)
        num_boxes = boxes.size(0)
        features = boxlist.get_field('features').reshape(num_boxes, -1)

        device = scores.device
        result = []

        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        thresh = 0.005

        while True:
            result = []
            inds_all = scores > thresh
            for j in range(1, num_classes):
                inds = inds_all[:, j].nonzero().squeeze(1)
                scores_j = scores[inds, j]
                boxes_j = boxes[inds, j * 4:(j + 1) * 4]
                features_j = features[inds, :]

                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class.add_field('features', features_j)
                boxlist_for_class, keep = boxlist_nms(boxlist_for_class,
                                                      self.nms,
                                                      return_idxs=True)

                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ),
                               j,
                               dtype=torch.int64,
                               device=device))
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > 100:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.detections_per_img + 1)
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
                break

            thresh = 0

        return result
コード例 #13
0
    def sliding_window_wsi(self,
                           pil_image,
                           nms_thresh=0.3,
                           model_size=600,
                           overlap=200):
        step = model_size - overlap
        w, h = pil_image.size
        l_bboxes = []
        l_scores = []
        l_labels = []
        for i in range(0, w, step):
            for j in range(0, h, step):
                if i + model_size > w:
                    i = w - model_size
                if j + model_size > h:
                    j = h - model_size

                image1 = pil_image.crop((i, j, i + model_size, j + model_size))
                pil_image_rgb = image1.convert("RGB")
                image = np.array(pil_image_rgb)[:, :, [2, 1, 0]]
                boxlist = self.compute_prediction(image)
                bbox = boxlist.bbox
                bbox_delta = torch.Tensor([i, j, i, j]).expand_as(bbox)
                l_bboxes.append(bbox + bbox_delta)
                if boxlist.has_field('scores'):
                    l_scores.append(boxlist.get_field('scores'))
                if boxlist.has_field('objectness'):
                    l_scores.append(boxlist.get_field('objectness'))


#                     l_labels.append(boxlist.get_field('objectness')>0.5)
                if boxlist.has_field('labels'):
                    l_labels.append(boxlist.get_field('labels'))
        if l_scores:
            bboxes = torch.cat(l_bboxes, 0)
            scores = torch.cat(l_scores, 0)
            if l_labels:
                labels = torch.cat(l_labels, 0)
            else:
                labels = scores > 0.5

            boxlist = BoxList(bboxes, pil_image.size, mode="xyxy")
            boxlist.add_field("scores", scores)
            boxlist.add_field("labels", labels)

            boxlist = boxlist_nms(
                boxlist,
                nms_thresh=nms_thresh,
                max_proposals=-1,
                score_field="scores",
            )

            return boxlist
        else:
            return None
コード例 #14
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        # 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标。然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维。
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # Top K
        # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
コード例 #15
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList], anchors size:(N, H*W*ratios,4) N为batch的个数, H W为当前特征层的size, 4为x1y1x2y2
            objectness: tensor of size N, A, H, W , 处理成 (N, H*W*ratios)
            box_regression: tensor of size N, A * 4, H, W , 处理成 (N, H*W*ratios,4)
        功能: 根据objectness概率的高低选出前pre_nms_top_n个anchor, 通过这些anchor和box_regression(学习映射函数dx dy dw dh)
        计算得到基于候选框的预测框xyxy,然后通过nms等条件进一步筛选得到最后的boxlist(把objectness分数存在extra_fields)
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()  # 归一化到0-1. 取top前2000

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W
        #在每个特征图上每张图片选取的anchor数
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        # 得到预测的候选框, 通过anchor和box_regression(学习映射函数dx dy dw dh)计算得到建议框
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(
                boxlist, self.min_size)  # 确保proposal的w & h > min_size
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result  # 元素为N (batch)
コード例 #16
0
ファイル: inference.py プロジェクト: qgking/MMT-PSM
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)
        if 'head_id' in boxlist.fields():
            head_id =  boxlist.get_field('head_id')
        else:
            head_id=torch.zeros( scores.shape[0])
        # pdb.set_trace()
        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        # import pdb;pdb.set_trace()
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            head_id_j = head_id[inds]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class.add_field("objectness", scores_j)
            boxlist_for_class.add_field('head_id', head_id_j)
            if self.cfg.MODEL.ROI_BOX_HEAD.K_HEAD>1 and \
                    self.cfg.MODEL.ROI_HEADS.NMS_TYPE=='set':
                boxlist_for_class = set_cpu_nms(boxlist_for_class, self.nms, score_field="scores")
            else:
                boxlist_for_class = boxlist_nms(
                boxlist_for_class, self.nms, score_field="scores"
            )
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #17
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            # uncomment this when rpn only !!!!
            #             boxlist.add_field("labels", score>0.5)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
コード例 #18
0
def detect_masked(img, coco_demo, preds, args, masking_color=128):
    masking_thresh = args.masking_thresh
    if args.all_at_once:
        if args.masking_type == 'bbox':
            mask = mask_from_boxes(img, preds, args)
            # mask = get_blocked_image(img,preds.bbox,inflation=masking_args.inflation_factor,is_abs=False,
            # block_color=masking_color)
        else:
            # mask = mask_from_masks(img,preds) # inflation factor not used here.
            if len(preds) == 0:
                mask = img * 0
            else:
                mask = mask_from_masks(img, preds, args)
        if args.negative_masking:
            mask = 1 - mask
        masked_img = img * mask + (1 - mask) * masking_color
        preds_masked = coco_demo.compute_prediction(masked_img[:, :, ::-1])
        return preds_masked
    else:
        all_preds = []
        ff = preds.extra_fields
        # if 'mask' in preds.extra_fields:
        # del preds.extra_fields['mask']
        n_above = sum(ff['scores'].numpy() >= args.masking_thresh)
        for bb, score, mask in tqdm(zip(preds.bbox, ff['scores'], ff['mask']),
                                    desc='masking one by one...',
                                    total=n_above):
            if score < args.masking_thresh:
                continue
            if args.masking_type == 'bbox':
                cur_mask = img * 0
                mask_from_a_box(cur_mask, bb, args.inflation_factor)
            else:
                cur_mask = np.repeat(mask.numpy().transpose(1, 2, 0), 3, 2)
            if args.negative_masking:
                cur_mask = 1 - cur_mask
            masked_img = img * cur_mask + (1 - cur_mask) * masking_color

            preds_masked = coco_demo.compute_prediction(masked_img[:, :, ::-1])

            all_preds.append(preds_masked)
            if 'mask' in preds_masked.extra_fields:
                del preds_masked.extra_fields['mask']
        if len(all_preds) > 0:
            all_preds = cat_boxlist(all_preds)
            all_preds = boxlist_nms(all_preds,
                                    .5,
                                    max_proposals=100,
                                    score_field="scores")
        else:
            all_preds = BoxList(torch.Tensor(0, 4), img.shape[:2])
        return all_preds
コード例 #19
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        # different behavior during training and during testing:
        # during training, post_nms_top_n is over *all* the proposals combined, while
        # during testing, it is over the proposals for each image
        # NOTE: it should be per image, and not per batch. However, to be consistent
        # with Detectron, the default is per batch (see Issue #672)
        for i in range(num_images):
            boxlist = boxlists[i]
            objectness = boxlist.get_field("objectness")
            # objectness = objectness[:, 1]
            pre_nms_top_n = min(self.pre_nms_top_n, len(objectness))
            _, inds_sorted = torch.topk(objectness,
                                        pre_nms_top_n,
                                        dim=0,
                                        sorted=True)
            boxlist = boxlist[inds_sorted]

            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            # boxlist.add_field('objectness', boxlist.get_field('objectness_ori'))
            boxlists[i] = boxlist

        if self.training and self.fpn_post_nms_per_batch:
            objectness = torch.cat(
                [boxlist.get_field("objectness") for boxlist in boxlists],
                dim=0)
            box_sizes = [len(boxlist) for boxlist in boxlists]
            post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness))
            _, inds_sorted = torch.topk(objectness,
                                        post_nms_top_n,
                                        dim=0,
                                        sorted=True)
            inds_mask = torch.zeros_like(objectness, dtype=torch.bool)
            inds_mask[inds_sorted] = 1
            inds_mask = inds_mask.split(box_sizes)
            for i in range(num_images):
                boxlists[i] = boxlists[i][inds_mask[i]]
        else:
            for i in range(num_images):
                objectness = boxlists[i].get_field("objectness")
                post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness))
                _, inds_sorted = torch.topk(objectness,
                                            post_nms_top_n,
                                            dim=0,
                                            sorted=True)
                boxlists[i] = boxlists[i][inds_sorted]
        return boxlists
コード例 #20
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList], (assume list number = batchSize N)
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # modify tensor shape [N, A*1, H, W] => [N, H*W*A, 1] => [N, H*W*A]
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        # modify tensor shape [N, A*41, H, W] => [N, H*W*A, 4]
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # [N, top_k_elems(H*W*A)]

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx] # [N, top_k_elems(H*W*A), 4]

        image_shapes = [box.size for box in anchors] # list(tuple)
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # list(3HW,4),list size=N => (N*AHW,4)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # box offsets + orig boxes => proposals(N*3HW, 4)
        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4) # => (N,3HW,4)

        result = []
        # for each img if a batch(N), image_shapes => input image size
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            # clip proposals to image_shapes
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result # N*BoxList
コード例 #21
0
ファイル: inference.py プロジェクト: henrywang1/maskrcnn-few
    def filter_results(self, boxlist, num_classes, unique_labels):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox  #.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores")  #.reshape(-1, num_classes)
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        # inds_all = scores > self.score_thresh
        k = scores.numel() // len(unique_labels)
        for j in range(0, num_classes):
            score_thresh = self.score_thresh
            scores_j = scores[j * k:(j + 1) * k]
            boxes_j = boxes[j * k:(j + 1) * k]
            inds = scores_j > (score_thresh)
            scores_j = scores_j[inds]
            boxes_j = boxes_j[inds]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            if scores_j.numel():
                box_len = len(boxlist_for_class)
                labels = unique_labels[j].repeat(box_len)
                proto_idx = (torch.zeros(box_len) + (j + 1)).long().cuda()
            else:  # assign empty tensor
                labels = scores_j.long()
                proto_idx = scores_j.long()

            boxlist_for_class.add_field("labels", labels)
            boxlist_for_class.add_field("proto_labels", proto_idx)
            boxlist_for_class = boxlist_nms(boxlist_for_class,
                                            self.nms,
                                            score_field="scores")
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #22
0
    def get_nms_boxes(self, detection):
        detection = boxlist_nms(detection, nms_thresh=0.5)

        _ids = detection.get_field('ids')
        _scores = detection.get_field('scores')

        # adjust the scores to the right range
        # _scores -= torch.floor(_scores) * (_ids >= 0) * (torch.floor(_scores) != _scores)
        # _scores[_scores >= 1.] = 1.

        _scores[_scores >= 2.] = _scores[_scores >= 2.] - 2.
        _scores[_scores >= 1.] = _scores[_scores >= 1.] - 1.

        return detection, _ids, _scores
コード例 #23
0
ファイル: inference.py プロジェクト: amazon-research/siam-mot
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)
        device = scores.device

        assert (boxlist.has_field('ids'))
        ids = boxlist.get_field('ids')

        result = [self.create_empty_boxlist(device=device)
                  for _ in range(1, num_classes)]

        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4: (j + 1) * 4]
            ids_j = ids[inds]

            det_idx = ids_j < 0
            det_boxlist = BoxList(boxes_j[det_idx, :], boxlist.size, mode="xyxy")
            det_boxlist.add_field("scores", scores_j[det_idx])
            det_boxlist.add_field("ids", ids_j[det_idx])
            det_boxlist = boxlist_nms(det_boxlist, self.nms)

            track_idx = ids_j >= 0
            # track_box is available
            if torch.any(track_idx > 0):
                track_boxlist = BoxList(boxes_j[track_idx, :], boxlist.size, mode="xyxy")
                track_boxlist.add_field("scores", scores_j[track_idx])
                track_boxlist.add_field("ids", ids_j[track_idx])
                det_boxlist = cat_boxlist([det_boxlist, track_boxlist])

            num_labels = len(det_boxlist)
            det_boxlist.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result[j-1] = det_boxlist

        result = cat_boxlist(result)
        return result
コード例 #24
0
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)  # 某一张图的某个种类的所有proposal的位置
            scores_j = scores[inds, j]  # 某一proposal的某个种类的得分
            boxes_j = boxes[inds, j * 4: (j + 1) * 4]  # 某一proposal的某个种类的实际坐标
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(  # 只在同类别中做NMS
                boxlist_for_class, self.nms
            )
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)  # 某一张图的NMS后剩下的所有种类的proposal
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:  # 再按照scores过滤一遍
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #25
0
ファイル: flickr.py プロジェクト: youngfly11/LCMCG-PyTorch
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: Tuple (image, target). target is a list of captions for the image.
        """

        img_id, sent_id = self.ids[index].split(
            '\t')[0], self.ids[index].split('\t')[1]

        topN_box = self.topN_box_anno[img_id][int(sent_id)]
        filename = os.path.join(self.img_root, img_id + '.jpg')
        img = Image.open(filename).convert('RGB')

        sent_sg = self.sg_anno[img_id]['relations'][int(sent_id)]
        _, feature_map, precompute_bbox, img_scale, precompute_score, cls_label = self.get_precompute_img_feat(
            img_id)

        precompute_bbox = BoxList(precompute_bbox, img.size, mode='xyxy')

        if cfg.MODEL.VG.USE_BOTTOMUP_NMS:
            precompute_bbox.add_field("scores",
                                      torch.FloatTensor(precompute_score))
            precompute_bbox, keep_inds = boxlist_nms(
                precompute_bbox,
                cfg.MODEL.VG.BOTTOMUP_NMS_THRESH,
                require_keep_idx=True)
            precompute_score = precompute_score[keep_inds.numpy()]

        sentence = self.get_sentence(img_id, int(sent_id))
        phrase_ids, gt_boxes = self.get_gt_boxes(img_id)
        target = BoxList(gt_boxes, img.size, mode="xyxy")

        vocab_label_elmo = self.vocab_embed[cls_label]

        if self.transforms is not None:
            img, target, precompute_bbox, img_scale = self.transforms(
                img, target, precompute_bbox, img_scale)

        return None, target, img_id, phrase_ids, sent_id, sentence, precompute_bbox, precompute_score, feature_map, vocab_label_elmo, sent_sg, topN_box
コード例 #26
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """

        device = objectness.device
        N, A, H, W = objectness.shape

        objectness, topk_idx, box_regression = self.objectness_top_k(
            objectness, box_regression)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
コード例 #27
0
    def filter_results(self, boxlist, num_classes, target_id=None):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(
                boxlist_for_class, self.nms
            )


            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), target_id, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            sorted_cls_scores, sorted_cls_indices = torch.sort(cls_scores, descending=True)
            keep = sorted_cls_indices[:self.detections_per_img]
            result = result[keep]
        return result
コード例 #28
0
ファイル: inference.py プロジェクト: youngfly11/LCMCG-PyTorch
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        # different behavior during training and during testing:
        # during training, post_nms_top_n is over *all* the proposals combined, while
        # during testing, it is over the proposals for each image
        # TODO resolve this difference and make it consistent. It should be per image,
        # and not per batch
        if self.training:
            objectness = torch.cat(
                [boxlist.get_field("objectness") for boxlist in boxlists],
                dim=0)
            box_sizes = [len(boxlist) for boxlist in boxlists]
            post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness))
            _, inds_sorted = torch.topk(objectness,
                                        post_nms_top_n,
                                        dim=0,
                                        sorted=True)
            inds_mask = torch.zeros_like(objectness, dtype=torch.uint8)
            inds_mask[inds_sorted] = 1
            inds_mask = inds_mask.split(box_sizes)
            for i in range(num_images):
                boxlists[i] = boxlists[i][inds_mask[i]]
        else:
            for i in range(num_images):
                objectness = boxlists[i].get_field("objectness")
                post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness))
                _, inds_sorted = torch.topk(objectness,
                                            post_nms_top_n,
                                            dim=0,
                                            sorted=True)

                boxlist = boxlists[i][inds_sorted]
                boxlist = boxlist_nms(
                    boxlist,
                    self.nms_thresh,
                    max_proposals=self.post_nms_top_n,
                    score_field="objectness",
                )
                boxlists[i] = boxlist
        return boxlists
コード例 #29
0
ファイル: inference.py プロジェクト: DeqiangWang/DetectionHub
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        # device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(
                boxlist_for_class, self.nms
            )
            boxlist_for_class.add_field(
                # we use full_like to allow tracing with flexible shape
                "labels", torch.full_like(boxlist_for_class.bbox[:, 0], j, dtype=torch.int64)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        scores = result.get_field("scores")
        if self.onnx_export:
            keep = self.detections_to_keep_onnx(scores)
        else:
            keep = self.detections_to_keep(scores)
        result = result[keep]
        return result
コード例 #30
0
ファイル: inference.py プロジェクト: youngfly11/LCMCG-PyTorch
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        apply the RPN result on anchors generate from single feature level
        from ont batch(has multiple images)
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """

        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        # decrease the proposal anchor number before the nms
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # filter the proposal bboxes by objectness score,
        # only left the hign objectness proposals for following operation
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        # take out the high objectness bbox regression result
        box_regression = box_regression[batch_idx, topk_idx]

        # preprocess the anchors for easy to process
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # apply the regression on the anchor boxes
        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))
        proposals = proposals.view(N, -1, 4)

        result = []
        # collect the processed anchor boxes in to BoxList form
        # and apply the nms to generate the final proposals
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result