Esempio n. 1
0
    def forward(self, x, boxes, transformer):
        """
        Arguments:
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        """
        # mask_prob = x.sigmoid()

        # [B, T, C]
        # word_probs = x.permute(1, 0, 2).softmax(2)

        # select masks coresponding to the predicted classes
        # num_words = word_probs.shape[0]
        # labels = [bbox.get_field("labels") for bbox in boxes]
        # labels = torch.cat(labels)
        # index = torch.arange(num_words, device=word_probs.device)
        # word_probs = word_probs[index][:, None]

        boxes_per_image = [len(box) for box in boxes]
        word_probs = x.split(boxes_per_image, dim=0)

        results = []
        for x_feature, box in zip(word_probs, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")

            predict_prob = greedy_decode(transformer, x_feature, self.src_mask, self.max_step)

            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("word_probs", predict_prob)
            results.append(bbox)

        return results
Esempio n. 2
0
    def __getitem__(self, index):

        if _DEBUG:
            index = 0

        anno = self.annobase[index % self.database_num][
            int(index / self.database_num) %
            len(self.annobase[index % self.database_num])]
        im_path = anno['image']
        img = Image.open(im_path).convert("RGB")
        # print('im_path:', im_path)
        text, text_len = self.wk_converter.encode(anno['gt_words'])

        text_label_split = []

        off_cnt = 0

        mx_len = np.max(text_len)
        word_num = len(text_len)

        for i in range(len(text_len)):
            text_label_split.append(text[off_cnt:off_cnt + text_len[i]])
            off_cnt += text_len[i]

        padding_words = np.zeros((word_num, mx_len))
        for i in range(word_num):
            padding_words[i][:text_len[i]] = text_label_split[i]

        if anno["boxes"].shape[0] > 0:
            target = RBoxList(torch.from_numpy(anno["boxes"]),
                              (anno['width'], anno['height']),
                              mode="xywha")
            target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
            target.add_field(
                "difficult",
                torch.tensor([0 for i in range(len(anno["gt_classes"]))]))
            target.add_field("words", torch.from_numpy(padding_words))
            target.add_field("word_length", torch.tensor(text_len))
            target = target.clip_to_image(remove_empty=True)
        else:
            target = torch.from_numpy(padding_words)

        if self.transforms is not None:
            img, target = self.transforms(img, target)
        if _DEBUG:
            self.show_boxes(img, target)

        return img, target, index
Esempio n. 3
0
    def forward(self, x, boxes):
        """
        Arguments:
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        """
        # mask_prob = x.sigmoid()

        # [T, B, C] -> [B, T, C]
        word_probs = x.permute(1, 0, 2).softmax(2)
        # print('word_probs:', np.unique(word_probs.data.cpu().numpy()))
        # select masks coresponding to the predicted classes
        num_words = word_probs.shape[0]
        labels = [bbox.get_field("labels") for bbox in boxes]
        labels = torch.cat(labels)
        index = torch.arange(num_words, device=labels.device)
        word_probs = word_probs[index][:, None]

        boxes_per_image = [len(box) for box in boxes]
        word_probs = word_probs.split(boxes_per_image, dim=0)

        results = []
        for prob, box in zip(word_probs, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")
            # print('prob:', prob)
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("word_probs", prob)
            results.append(bbox)

        return results
Esempio n. 4
0
    def __getitem__(self, index):

        # if _DEBUG:
        # index = 1

        # img_id = self.ids[index]

        im_path = self.annobase[index][
            'image']  # os.path.join(self.root, img_id + '.jpg')
        img = Image.open(im_path).convert("RGB")
        # im = cv2.imread(im_path)
        anno = self.annobase[index]
        target = RBoxList(torch.from_numpy(anno["boxes"]),
                          (anno['width'], anno['height']),
                          mode="xywha")
        target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
        target.add_field(
            "difficult",
            torch.Tensor([0 for i in range(len(anno["gt_classes"]))]))

        masks = [
            np.array(mask).reshape(1, -1).tolist() for mask in anno["polys"]
        ]
        # print('masks data:', masks)
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        # target.add_field("masks", torch.from_numpy(np.array(anno["polys"]).reshape(-1)))

        target = target.clip_to_image(remove_empty=True)
        # print('target:', target, im_path)
        if self.transforms is not None:
            # off = int(self.num_samples * np.random.rand())
            # mix_index = (off + index) % self.num_samples
            # img_mix = Image.open(self.annobase[mix_index]['image']).convert("RGB")
            # img, target = self.mixup(img, img_mix, target)
            img, target = self.transforms(img, target)
        if _DEBUG:
            if not target is None:
                # print('target:', target, im_path)
                self.show_boxes(img, target)

        return img, target, index
Esempio n. 5
0
    def __getitem__(self, idx):
        img_name = self.id_to_img_map[idx]
        img = utils.pil_load_img(os.path.join(self.root, img_name))
        anno = utils.read_anno(self.annotations, img_name)

        # filter illegal
        anno = [obj for obj in anno if not obj['illegibility']]

        # bounding boxes
        boxes = [
            utils.generate_rbox(obj["points"],
                                np.array(img).shape[:2]) for obj in anno
        ]
        boxes = torch.as_tensor(boxes).reshape(-1, 5)  # guard against no boxes
        target = RBoxList(boxes, img.size, mode="xywha")

        # classes
        classes = [1] * len(anno)
        classes = torch.tensor(classes)
        target.add_field("labels", classes)
        target.add_field("difficult",
                         torch.tensor([0 for i in range(len(classes))]))

        # masks
        masks = [obj["points"].reshape((1, -1)).tolist() for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        # target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        assert target is not None, "{} target is None.".format(img_name)

        return img, target, idx
Esempio n. 6
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 5, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
        objectness = objectness.sigmoid()
        box_regression = box_regression.view(N, -1, 5, H,
                                             W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 5)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 5)[batch_idx, topk_idx]

        # print('concat_anchors:', concat_anchors.size(), concat_anchors[:, 2:4])

        proposals = self.box_coder.decode(box_regression.view(-1, 5),
                                          concat_anchors.view(-1, 5))

        proposals = proposals.view(N, -1, 5)
        # print('outsider:', proposals.size(), proposals[:, 2:4], 'box_regression:', box_regression)

        #-------
        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = RBoxList(proposal, im_shape, mode="xywha")

            # print('before nms:', boxlist.bbox.size(), boxlist.bbox[:, 2:4])

            boxlist.add_field("objectness", score)
            # boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )

            # print('after nms:', boxlist.bbox.size(), boxlist.bbox[:, 2:4])

            result.append(boxlist)
        return result
Esempio n. 7
0
    def rotate_boxes(self, target, angle):
        # def rotate_gt_bbox(iminfo, gt_boxes, gt_classes, angle):
        gt_boxes = target.bbox
        if isinstance(target.bbox, torch.Tensor):
            gt_boxes = target.bbox.data.cpu().numpy()

        gt_labels = target.get_field("labels")
        gt_masks = [
            gt_polygon.polygons[0].numpy().reshape(-1, 2)
            for gt_polygon in target.get_field("masks")
        ]

        rotated_gt_boxes = np.empty((len(gt_boxes), 5), dtype=np.float32)

        iminfo = target.size

        im_height = iminfo[1]
        im_width = iminfo[0]
        origin_gt_boxes = gt_boxes

        # anti-clockwise to clockwise arc
        cos_cita = np.cos(np.pi / 180 * angle)
        sin_cita = np.sin(np.pi / 180 * angle)

        # clockwise matrix
        rotation_matrix = np.array([[cos_cita, sin_cita],
                                    [-sin_cita, cos_cita]])

        # rotate rbox
        pts_ctr = origin_gt_boxes[:, 0:2]
        pts_ctr = pts_ctr - np.tile((im_width / 2, im_height / 2),
                                    (gt_boxes.shape[0], 1))
        pts_ctr = np.array(np.dot(pts_ctr, rotation_matrix), dtype=np.int16)
        pts_ctr = np.squeeze(pts_ctr, axis=-1) + np.tile(
            (im_width / 2, im_height / 2), (gt_boxes.shape[0], 1))

        # rotate masks
        rotated_gt_masks = []
        for polygon in gt_masks:
            polygon = polygon - np.tile((im_width / 2, im_height / 2),
                                        (polygon.shape[0], 1))
            polygon = np.array(np.dot(polygon, rotation_matrix),
                               dtype=np.int16)
            polygon = np.squeeze(polygon, axis=-1) + np.tile(
                (im_width / 2, im_height / 2), (polygon.shape[0], 1))
            rotated_gt_masks.append(polygon.astype(np.int32))

        # print('pts_ctr:', pts_ctr, np.tile((im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)).shape)
        origin_gt_boxes[:, 0:2] = pts_ctr
        # print origin_gt_boxes[:, 0:2]

        len_of_gt = len(origin_gt_boxes)

        # rectificate the angle in the range of [-45, 45]
        for idx in range(len_of_gt):
            ori_angle = origin_gt_boxes[idx, 4]
            height = origin_gt_boxes[idx, 3]
            width = origin_gt_boxes[idx, 2]

            # step 1: normalize gt (-45,135)
            if width < height:
                ori_angle += 90
                width, height = height, width

            # step 2: rotate (-45,495)
            rotated_angle = ori_angle + angle

            # step 3: normalize rotated_angle       (-45,135)
            while rotated_angle > 135:
                rotated_angle = rotated_angle - 180

            rotated_gt_boxes[idx, 0] = origin_gt_boxes[idx, 0]
            rotated_gt_boxes[idx, 1] = origin_gt_boxes[idx, 1]
            # rotated_gt_boxes[idx, 3] = height * self.gt_margin
            # rotated_gt_boxes[idx, 2] = width * self.gt_margin
            rotated_gt_boxes[idx, 3] = height
            rotated_gt_boxes[idx, 2] = width
            rotated_gt_boxes[idx, 4] = rotated_angle

        x_inbound = np.logical_and(rotated_gt_boxes[:, 0] >= 0,
                                   rotated_gt_boxes[:, 0] < im_width)
        y_inbound = np.logical_and(rotated_gt_boxes[:, 1] >= 0,
                                   rotated_gt_boxes[:, 1] < im_height)

        inbound = np.logical_and(x_inbound, y_inbound)

        inbound_th = torch.tensor(np.where(inbound)).long().view(-1)

        rotated_gt_boxes_th = torch.tensor(rotated_gt_boxes[inbound]).to(
            target.bbox.device)
        # print('gt_labels before:', gt_labels.size(), inbound_th.size())
        gt_labels = gt_labels[inbound_th]
        # print('gt_labels after:', gt_labels.size())
        difficulty = target.get_field("difficult")
        difficulty = difficulty[inbound_th]

        target_cpy = RBoxList(rotated_gt_boxes_th, iminfo, mode='xywha')
        target_cpy.add_field('difficult', difficulty)
        target_cpy.add_field('labels', gt_labels)

        # add mask filed
        masks = [
            polygon.reshape((1, -1)).tolist() for polygon in rotated_gt_masks
        ]
        masks = SegmentationMask(masks, iminfo)
        target_cpy.add_field("masks", masks)

        # print('has word:', target.has_field("words"), target.get_field("words"))
        if target.has_field("words"):
            words = target.get_field("words")[inbound_th]
            target_cpy.add_field('words', words)
        if target.has_field("word_length"):
            word_length = target.get_field("word_length")[inbound_th]
            target_cpy.add_field('word_length', word_length)
        # print('rotated_gt_boxes_th:', origin_gt_boxes[0], target_cpy.bbox[0])
        # print('rotated_gt_boxes_th:', target.bbox.size(), gt_boxes.shape)

        if target_cpy.bbox.size()[0] <= 0:
            print(target_cpy.bbox.size()[0])
            return None

        return target_cpy
Esempio n. 8
0
    def forward_for_single_feature_map(self, anchors, objectness_,
                                       box_regression_, scale):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 5, H, W

        """
        device = objectness_.device
        N, A, H, W = objectness_.shape

        width, height = anchors[0].size
        # scale = width / W

        # put in the same format as anchors
        objectness = objectness_.permute(0, 2, 3, 1)
        objectness = objectness.reshape(N, -1)
        # get the first 5 channels
        box_regression = box_regression_[:, :5].view(N, -1, 5, H,
                                                     W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 5)

        all_proposals = eastbox2rbox(box_regression, self.base_size, (H, W),
                                     scale)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        proposals = all_proposals.view(N, -1, 5)[batch_idx, topk_idx]
        image_shapes = [box.size for box in anchors]

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):

            if not self.training:
                # print("score:", score.shape)
                # print("proposal:", proposal.shape)

                proposal = proposal[score > self.score_thresh]
                score = score[score > self.score_thresh]

                # print("score:", score.shape, score)
                # print("proposal:", proposal.shape)
            # print("score:", score)
            boxlist = RBoxList(proposal, im_shape, mode="xywha")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = self.nms_fn(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Esempio n. 9
0
    def gt_crop(self, target, crop_portion, x_factor, y_factor):

        gt_boxes = target.bbox
        if isinstance(target.bbox, torch.Tensor):
            gt_boxes = target.bbox.data.cpu().numpy()

        gt_classes = target.get_field("labels")

        ow, oh = target.size
        dh = int(oh * crop_portion)
        dw = int(ow * crop_portion)
        th = int(oh * (1 - crop_portion))
        tw = int(ow * (1 - crop_portion))

        y0 = int((dh - 1) * y_factor)
        x0 = int((dw - 1) * x_factor)

        gt_boxes[:, 0] -= x0
        gt_boxes[:, 1] -= y0

        #####################

        outer_bound = 0.2

        polys = rbox2poly(gt_boxes).reshape(-1, 4, 2)

        # (b, 4)
        x_poly = polys[..., 0]
        y_poly = polys[..., 1]

        # bounding box with outer border on their heights and widths
        outer_bound_x = np.tile(outer_bound * gt_boxes[:, 2:3],
                                (1, x_poly.shape[-1]))
        outer_bound_y = np.tile(outer_bound * gt_boxes[:, 3:4],
                                (1, x_poly.shape[-1]))

        # (b, 4)
        x_check = np.logical_and(x_poly >= 0 - outer_bound_x,
                                 x_poly < tw + outer_bound_x)
        y_check = np.logical_and(y_poly >= 0 - outer_bound_y,
                                 y_poly < th + outer_bound_y)

        x_sum = np.sum(x_check.astype(np.int32), axis=-1)
        y_sum = np.sum(y_check.astype(np.int32), axis=-1)

        inbound = (x_sum + y_sum) > 7.

        #####################

        # x_inbound = np.logical_and(gt_boxes[:, 0] >= 0, gt_boxes[:, 0] < tw)
        # y_inbound = np.logical_and(gt_boxes[:, 1] >= 0, gt_boxes[:, 1] < th)

        #####################

        iminfo = (tw, th)

        # inbound = np.logical_and(x_inbound, y_inbound)

        inbound_th = torch.tensor(np.where(inbound)).long().view(-1)

        crop_gt_boxes_th = torch.tensor(gt_boxes[inbound]).to(
            target.bbox.device)
        # print('gt_labels before:', gt_labels.size(), inbound_th.size())
        gt_labels = gt_classes[inbound_th].to(target.bbox.device)
        # print('gt_labels after:', gt_labels.size())
        difficulty = target.get_field("difficult")
        difficulty = difficulty[inbound_th].to(target.bbox.device)

        target_cpy = RBoxList(crop_gt_boxes_th, iminfo, mode='xywha')
        target_cpy.add_field('difficult', difficulty)
        target_cpy.add_field('labels', gt_labels)
        # print('has word:', target.has_field("words"), target.get_field("words"))
        if target.has_field("words"):
            words = target.get_field("words")[inbound_th]
            target_cpy.add_field('words', words)
        if target.has_field("word_length"):
            word_length = target.get_field("word_length")[inbound_th]
            target_cpy.add_field('word_length', word_length)
        if target.has_field("masks"):
            seg_masks = target.get_field("masks")[inbound_th]
            # print('seg_masks:', seg_masks)
            target_cpy.add_field('masks', seg_masks.shift(-x0, -y0, iminfo))

        # print('rotated_gt_boxes_th:', origin_gt_boxes[0], target_cpy.bbox[0])
        # print('rotated_gt_boxes_th:', target.bbox.size(), gt_boxes.shape)

        if target_cpy.bbox.size()[0] <= 0:
            # print("target has no boxes...")
            return None

        return target_cpy