예제 #1
0
    def __getitem__(self, index):

        # if _DEBUG:
        #     index = 0

        # img_id = self.ids[index]

        im_path = self.annobase[index]['image']# os.path.join(self.root, img_id + '.jpg')
        img = Image.open(im_path).convert("RGB")
        # im = cv2.imread(im_path)
        anno = self.annobase[index]
        target = RBoxList(torch.from_numpy(anno["boxes"]), (anno['width'], anno['height']), mode="xywha")
        target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
        target.add_field("difficult", torch.Tensor([0 for i in range(len(anno["gt_classes"]))]))

        target = target.clip_to_image(remove_empty=True)
        # print('target:', target, im_path)
        if self.transforms is not None:
            # off = int(self.num_samples * np.random.rand())
            # mix_index = (off + index) % self.num_samples
            # img_mix = Image.open(self.annobase[mix_index]['image']).convert("RGB")
            # img, target = self.mixup(img, img_mix, target)
            img, target = self.transforms(img, target)
        if _DEBUG:
            if not target is None:
                self.show_boxes(img, target)

        return img, target, index
    def __getitem__(self, index):

        if _DEBUG:
            index = 0

        anno = self.annobase[index % self.database_num][
            int(index / self.database_num) %
            len(self.annobase[index % self.database_num])]
        im_path = anno['image']
        img = Image.open(im_path).convert("RGB")
        # print('im_path:', im_path)
        text, text_len = self.wk_converter.encode(anno['gt_words'])

        text_label_split = []

        off_cnt = 0

        mx_len = np.max(text_len)
        word_num = len(text_len)

        for i in range(len(text_len)):
            text_label_split.append(text[off_cnt:off_cnt + text_len[i]])
            off_cnt += text_len[i]

        padding_words = np.zeros((word_num, mx_len))
        for i in range(word_num):
            padding_words[i][:text_len[i]] = text_label_split[i]

        if anno["boxes"].shape[0] > 0:
            target = RBoxList(torch.from_numpy(anno["boxes"]),
                              (anno['width'], anno['height']),
                              mode="xywha")
            target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
            target.add_field(
                "difficult",
                torch.tensor([0 for i in range(len(anno["gt_classes"]))]))
            target.add_field("words", torch.from_numpy(padding_words))
            target.add_field("word_length", torch.tensor(text_len))
            target = target.clip_to_image(remove_empty=True)
        else:
            target = torch.from_numpy(padding_words)

        if self.transforms is not None:
            img, target = self.transforms(img, target)
        if _DEBUG:
            self.show_boxes(img, target)

        return img, target, index
예제 #3
0
    def forward_for_single_feature_map(self, anchors, objectness_,
                                       box_regression_, scale):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 5, H, W

        """
        device = objectness_.device
        N, A, H, W = objectness_.shape

        width, height = anchors[0].size
        # scale = width / W

        # put in the same format as anchors
        objectness = objectness_.permute(0, 2, 3, 1)
        objectness = objectness.reshape(N, -1)
        # get the first 5 channels
        box_regression = box_regression_[:, :5].view(N, -1, 5, H,
                                                     W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 5)

        all_proposals = eastbox2rbox(box_regression, self.base_size, (H, W),
                                     scale)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        proposals = all_proposals.view(N, -1, 5)[batch_idx, topk_idx]
        image_shapes = [box.size for box in anchors]

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):

            if not self.training:
                # print("score:", score.shape)
                # print("proposal:", proposal.shape)

                proposal = proposal[score > self.score_thresh]
                score = score[score > self.score_thresh]

                # print("score:", score.shape, score)
                # print("proposal:", proposal.shape)
            # print("score:", score)
            boxlist = RBoxList(proposal, im_shape, mode="xywha")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = self.nms_fn(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result