Python RBoxList 예제들, maskrcnn_benchmark.structures.bounding_box.RBoxList Python 예제들

예제 #1

0

파일 보기

파일: inference.py 프로젝트: caozhengquan/RRPN_plusplus

    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 5, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
        objectness = objectness.sigmoid()
        box_regression = box_regression.view(N, -1, 5, H, W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 5)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 5)[batch_idx, topk_idx]

        # print('concat_anchors:', concat_anchors.size(), concat_anchors[:, 2:4])

        proposals = self.box_coder.decode(
            box_regression.view(-1, 5), concat_anchors.view(-1, 5)
        )

        proposals = proposals.view(N, -1, 5)
        # print('outsider:', concat_anchors[:10, ], proposals.size(), proposals[:10, ], 'box_regression:', box_regression[:10, ])

        #-------
        result = []
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = RBoxList(proposal, im_shape, mode="xywha")

            # print('before nms:', boxlist.bbox.size(), boxlist.bbox[:, 2:4])

            boxlist.add_field("objectness", score)
            # boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )

            # print('rrpn after nms:', boxlist.bbox.size(), boxlist.bbox[:10, ])

            result.append(boxlist)
        return result

예제 #2

0

파일 보기

파일: mask_head.py 프로젝트: happog/FudanOCR

def shrink_proposals(boxes, cfg):
    assert isinstance(boxes, (list, tuple))
    assert isinstance(boxes[0], RBoxList)
    new_boxes = []
    for boxes_per_image in boxes:
        proposals = boxes_per_image.bbox
        im_info = boxes_per_image.size
        proposals[:, 2:4] /= cfg.MODEL.RRPN.GT_BOX_MARGIN
        new_boxes_per_image = RBoxList(proposals, im_info, mode="xywha")
        new_boxes_per_image._copy_extra_fields(boxes_per_image)
        new_boxes.append(new_boxes_per_image)
    return new_boxes

예제 #3

0

파일 보기

파일: LSVT_dataset.py 프로젝트: happog/FudanOCR

    def __getitem__(self, idx):
        img_name = self.id_to_img_map[idx]
        img = utils.pil_load_img(os.path.join(self.root, img_name))
        anno = utils.read_anno(self.annotations, img_name)

        # filter illegal
        anno = [obj for obj in anno if not obj['illegibility']]

        # bounding boxes
        boxes = [utils.generate_rbox(obj["points"], np.array(img).shape[:2]) for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 5)  # guard against no boxes
        target = RBoxList(boxes, img.size, mode="xywha")

        # classes
        classes = [1] * len(anno)
        classes = torch.tensor(classes)
        target.add_field("labels", classes)
        target.add_field("difficult", torch.tensor([0 for i in range(len(classes))]))

        # masks
        masks = [obj["points"].reshape((1, -1)).tolist() for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        # target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        assert target is not None, "{} target is None.".format(img_name)

        return img, target, idx

예제 #4

0

파일 보기

    def __getitem__(self, index):

        # if _DEBUG:
        #     index = 0

        # img_id = self.ids[index]

        im_path = self.annobase[index]['image']# os.path.join(self.root, img_id + '.jpg')
        img = Image.open(im_path).convert("RGB")
        # im = cv2.imread(im_path)
        anno = self.annobase[index]
        target = RBoxList(torch.from_numpy(anno["boxes"]), (anno['width'], anno['height']), mode="xywha")
        target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
        target.add_field("difficult", torch.Tensor([0 for i in range(len(anno["gt_classes"]))]))

        target = target.clip_to_image(remove_empty=True)
        # print('target:', target, im_path)
        if self.transforms is not None:
            # off = int(self.num_samples * np.random.rand())
            # mix_index = (off + index) % self.num_samples
            # img_mix = Image.open(self.annobase[mix_index]['image']).convert("RGB")
            # img, target = self.mixup(img, img_mix, target)
            img, target = self.transforms(img, target)
        if _DEBUG:
            if not target is None:
                self.show_boxes(img, target)

        return img, target, index

예제 #5

0

파일 보기

파일: inference.py 프로젝트: caozhengquan/RRPN_plusplus

 def prepare_boxlist(self, boxes, scores, image_shape):
     """
     Returns BoxList from `boxes` and adds probability scores information
     as an extra field
     `boxes` has shape (#detections, 4 * #classes), where each row represents
     a list of predicted bounding boxes for each of the object classes in the
     dataset (including the background class). The detections in each row
     originate from the same object proposal.
     `scores` has shape (#detection, #classes), where each row represents a list
     of object detection confidence scores for each of the object classes in the
     dataset (including the background class). `scores[i, j]`` corresponds to the
     box at `boxes[i, j * 5:(j + 1) * 5]`.
     """
     boxes = boxes.reshape(-1, 5)
     scores = scores.reshape(-1)
     boxlist = RBoxList(boxes, image_shape, mode="xywha")
     boxlist.add_field("scores", scores)
     return boxlist

예제 #6

0

파일 보기

파일: inference.py 프로젝트: happog/FudanOCR

    def forward(self, x, boxes):
        """
        Arguments:
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        """
        mask_prob = x.sigmoid()

        # select masks coresponding to the predicted classes
        num_masks = x.shape[0]
        labels = [bbox.get_field("labels") for bbox in boxes]
        labels = torch.cat(labels)
        index = torch.arange(num_masks, device=labels.device)
        mask_prob = mask_prob[index, labels][:, None]

        boxes_per_image = [len(box) for box in boxes]
        mask_prob = mask_prob.split(boxes_per_image, dim=0)

        if self.masker:
            mask_prob = self.masker(mask_prob, boxes)

        results = []
        for prob, box in zip(mask_prob, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("mask", prob)
            results.append(bbox)

        return results

예제 #7

0

파일 보기

파일: inference.py 프로젝트: caozhengquan/RRPN_plusplus

    def filter_results(self, boxlist, num_classes, num_of_fwd_left):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 5)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]

            # print("scores_j:", np.unique(scores_j.data.cpu().numpy())[-10:])

            boxes_j = boxes[inds, j * 5 : (j + 1) * 5]
            boxlist_for_class = RBoxList(boxes_j, boxlist.size, mode="xywha")
            boxlist_for_class.add_field("scores", scores_j)

            if num_of_fwd_left == 0:
                boxlist_for_class.rescale(1. / self.shrink_margin)
                boxlist_for_class = self.nms_fn(
                    boxlist_for_class, self.nms, score_field="scores"
                )
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result

예제 #8

0

파일 보기

파일: anchor_generator.py 프로젝트: zgy951/remote_sensing_object_detection_2019

 def forward(self, image_list, feature_maps):
     grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
     anchors_over_all_feature_maps = self.grid_anchors(grid_sizes)
     anchors = []
     for i, (image_height, image_width) in enumerate(image_list.image_sizes):
         anchors_in_image = []
         for anchors_per_feature_map in anchors_over_all_feature_maps:
             boxlist = RBoxList(
                 anchors_per_feature_map, (image_width, image_height), mode="xywha"
             )
             self.add_visibility_to(boxlist)
             anchors_in_image.append(boxlist)
         anchors.append(anchors_in_image)
     return anchors

예제 #9

0

파일 보기

파일: inference.py 프로젝트: happog/FudanOCR

    def forward(self, boxes, pred_maskiou, labels):
        num_masks = pred_maskiou.shape[0]
        index = torch.arange(num_masks, device=labels.device)
        maskious = pred_maskiou[index, labels]
        maskious = [maskious]
        results = []
        for maskiou, box in zip(maskious, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox_scores = bbox.get_field("scores")
            mask_scores = bbox_scores * maskiou
            bbox.add_field("mask_scores", mask_scores)
            results.append(bbox)

        return results

예제 #10

0

파일 보기

파일: inference.py 프로젝트: caozhengquan/RRPN_plusplus

    def forward(self, x, boxes, transformer):
        """
        Arguments:
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        """
        # mask_prob = x.sigmoid()

        # [B, T, C]
        # word_probs = x.permute(1, 0, 2).softmax(2)

        # select masks coresponding to the predicted classes
        # num_words = word_probs.shape[0]
        # labels = [bbox.get_field("labels") for bbox in boxes]
        # labels = torch.cat(labels)
        # index = torch.arange(num_words, device=word_probs.device)
        # word_probs = word_probs[index][:, None]

        boxes_per_image = [len(box) for box in boxes]
        word_probs = x.split(boxes_per_image, dim=0)

        results = []
        for x_feature, box in zip(word_probs, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")

            predict_prob = greedy_decode(transformer, x_feature, self.src_mask, self.max_step)

            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("word_probs", predict_prob)
            results.append(bbox)

        return results

예제 #11

0

파일 보기

    def forward(self, x, boxes):
        """
        Arguments:
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        """
        # mask_prob = x.sigmoid()

        # [T, B, C] -> [B, T, C]
        word_probs = x.permute(1, 0, 2).softmax(2)
        # print('word_probs:', np.unique(word_probs.data.cpu().numpy()))
        # select masks coresponding to the predicted classes
        num_words = word_probs.shape[0]
        labels = [bbox.get_field("labels") for bbox in boxes]
        labels = torch.cat(labels)
        index = torch.arange(num_words, device=labels.device)
        word_probs = word_probs[index][:, None]

        boxes_per_image = [len(box) for box in boxes]
        word_probs = word_probs.split(boxes_per_image, dim=0)

        results = []
        for prob, box in zip(word_probs, boxes):
            bbox = RBoxList(box.bbox, box.size, mode="xywha")
            # print('prob:', prob)
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("word_probs", prob)
            results.append(bbox)

        return results

예제 #12

0

파일 보기

파일: rrpn_e2e_series.py 프로젝트: zgy951/remote_sensing_object_detection_2019

    def __getitem__(self, index):

        if _DEBUG:
            index = 0

        anno = self.annobase[index % self.database_num][
            int(index / self.database_num) %
            len(self.annobase[index % self.database_num])]
        im_path = anno['image']
        img = Image.open(im_path).convert("RGB")
        # print('im_path:', im_path)
        text, text_len = self.wk_converter.encode(anno['gt_words'])

        text_label_split = []

        off_cnt = 0

        mx_len = np.max(text_len)
        word_num = len(text_len)

        for i in range(len(text_len)):
            text_label_split.append(text[off_cnt:off_cnt + text_len[i]])
            off_cnt += text_len[i]

        padding_words = np.zeros((word_num, mx_len))
        for i in range(word_num):
            padding_words[i][:text_len[i]] = text_label_split[i]

        if anno["boxes"].shape[0] > 0:
            target = RBoxList(torch.from_numpy(anno["boxes"]),
                              (anno['width'], anno['height']),
                              mode="xywha")
            target.add_field("labels", torch.from_numpy(anno["gt_classes"]))
            target.add_field(
                "difficult",
                torch.tensor([0 for i in range(len(anno["gt_classes"]))]))
            target.add_field("words", torch.from_numpy(padding_words))
            target.add_field("word_length", torch.tensor(text_len))
            target = target.clip_to_image(remove_empty=True)
        else:
            target = torch.from_numpy(padding_words)

        if self.transforms is not None:
            img, target = self.transforms(img, target)
        if _DEBUG:
            self.show_boxes(img, target)

        return img, target, index

예제 #13

0

파일 보기

파일: transforms.py 프로젝트: happog/FudanOCR

    def rotate_boxes(self, target, angle):
        # def rotate_gt_bbox(iminfo, gt_boxes, gt_classes, angle):
        gt_boxes = target.bbox
        if isinstance(target.bbox, torch.Tensor):
            gt_boxes = target.bbox.data.cpu().numpy()

        gt_labels = target.get_field("labels")
        gt_masks = [
            gt_polygon.polygons[0].numpy().reshape(-1, 2)
            for gt_polygon in target.get_field("masks")
        ]

        rotated_gt_boxes = np.empty((len(gt_boxes), 5), dtype=np.float32)

        iminfo = target.size

        im_height = iminfo[1]
        im_width = iminfo[0]
        origin_gt_boxes = gt_boxes

        # anti-clockwise to clockwise arc
        cos_cita = np.cos(np.pi / 180 * angle)
        sin_cita = np.sin(np.pi / 180 * angle)

        # clockwise matrix
        rotation_matrix = np.array([[cos_cita, sin_cita],
                                    [-sin_cita, cos_cita]])

        # rotate rbox
        pts_ctr = origin_gt_boxes[:, 0:2]
        pts_ctr = pts_ctr - np.tile((im_width / 2, im_height / 2),
                                    (gt_boxes.shape[0], 1))
        pts_ctr = np.array(np.dot(pts_ctr, rotation_matrix), dtype=np.int16)
        pts_ctr = np.squeeze(pts_ctr, axis=-1) + np.tile(
            (im_width / 2, im_height / 2), (gt_boxes.shape[0], 1))

        # rotate masks
        rotated_gt_masks = []
        for polygon in gt_masks:
            polygon = polygon - np.tile((im_width / 2, im_height / 2),
                                        (polygon.shape[0], 1))
            polygon = np.array(np.dot(polygon, rotation_matrix),
                               dtype=np.int16)
            polygon = np.squeeze(polygon, axis=-1) + np.tile(
                (im_width / 2, im_height / 2), (polygon.shape[0], 1))
            rotated_gt_masks.append(polygon.astype(np.int32))

        # print('pts_ctr:', pts_ctr, np.tile((im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)).shape)
        origin_gt_boxes[:, 0:2] = pts_ctr
        # print origin_gt_boxes[:, 0:2]

        len_of_gt = len(origin_gt_boxes)

        # rectificate the angle in the range of [-45, 45]
        for idx in range(len_of_gt):
            ori_angle = origin_gt_boxes[idx, 4]
            height = origin_gt_boxes[idx, 3]
            width = origin_gt_boxes[idx, 2]

            # step 1: normalize gt (-45,135)
            if width < height:
                ori_angle += 90
                width, height = height, width

            # step 2: rotate (-45,495)
            rotated_angle = ori_angle + angle

            # step 3: normalize rotated_angle       (-45,135)
            while rotated_angle > 135:
                rotated_angle = rotated_angle - 180

            rotated_gt_boxes[idx, 0] = origin_gt_boxes[idx, 0]
            rotated_gt_boxes[idx, 1] = origin_gt_boxes[idx, 1]
            # rotated_gt_boxes[idx, 3] = height * self.gt_margin
            # rotated_gt_boxes[idx, 2] = width * self.gt_margin
            rotated_gt_boxes[idx, 3] = height
            rotated_gt_boxes[idx, 2] = width
            rotated_gt_boxes[idx, 4] = rotated_angle

        x_inbound = np.logical_and(rotated_gt_boxes[:, 0] >= 0,
                                   rotated_gt_boxes[:, 0] < im_width)
        y_inbound = np.logical_and(rotated_gt_boxes[:, 1] >= 0,
                                   rotated_gt_boxes[:, 1] < im_height)

        inbound = np.logical_and(x_inbound, y_inbound)

        inbound_th = torch.tensor(np.where(inbound)).long().view(-1)

        rotated_gt_boxes_th = torch.tensor(rotated_gt_boxes[inbound]).to(
            target.bbox.device)
        # print('gt_labels before:', gt_labels.size(), inbound_th.size())
        gt_labels = gt_labels[inbound_th]
        # print('gt_labels after:', gt_labels.size())
        difficulty = target.get_field("difficult")
        difficulty = difficulty[inbound_th]

        target_cpy = RBoxList(rotated_gt_boxes_th, iminfo, mode='xywha')
        target_cpy.add_field('difficult', difficulty)
        target_cpy.add_field('labels', gt_labels)

        # add mask filed
        masks = [
            polygon.reshape((1, -1)).tolist() for polygon in rotated_gt_masks
        ]
        masks = SegmentationMask(masks, iminfo)
        target_cpy.add_field("masks", masks)

        # print('has word:', target.has_field("words"), target.get_field("words"))
        if target.has_field("words"):
            words = target.get_field("words")[inbound_th]
            target_cpy.add_field('words', words)
        if target.has_field("word_length"):
            word_length = target.get_field("word_length")[inbound_th]
            target_cpy.add_field('word_length', word_length)
        # print('rotated_gt_boxes_th:', origin_gt_boxes[0], target_cpy.bbox[0])
        # print('rotated_gt_boxes_th:', target.bbox.size(), gt_boxes.shape)

        if target_cpy.bbox.size()[0] <= 0:
            print(target_cpy.bbox.size()[0])
            return None

        return target_cpy

예제 #14

0

파일 보기

    def forward_for_single_feature_map(self, anchors, objectness_,
                                       box_regression_, scale):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 5, H, W

        """
        device = objectness_.device
        N, A, H, W = objectness_.shape

        width, height = anchors[0].size
        # scale = width / W

        # put in the same format as anchors
        objectness = objectness_.permute(0, 2, 3, 1)
        objectness = objectness.reshape(N, -1)
        # get the first 5 channels
        box_regression = box_regression_[:, :5].view(N, -1, 5, H,
                                                     W).permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 5)

        all_proposals = eastbox2rbox(box_regression, self.base_size, (H, W),
                                     scale)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        proposals = all_proposals.view(N, -1, 5)[batch_idx, topk_idx]
        image_shapes = [box.size for box in anchors]

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):

            if not self.training:
                # print("score:", score.shape)
                # print("proposal:", proposal.shape)

                proposal = proposal[score > self.score_thresh]
                score = score[score > self.score_thresh]

                # print("score:", score.shape, score)
                # print("proposal:", proposal.shape)
            # print("score:", score)
            boxlist = RBoxList(proposal, im_shape, mode="xywha")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = self.nms_fn(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result

예제 #15

0

파일 보기

    def gt_crop(self, target, crop_portion, x_factor, y_factor):

        gt_boxes = target.bbox
        if isinstance(target.bbox, torch.Tensor):
            gt_boxes = target.bbox.data.cpu().numpy()

        gt_classes = target.get_field("labels")

        ow, oh = target.size
        dh = int(oh * crop_portion)
        dw = int(ow * crop_portion)
        th = int(oh * (1 - crop_portion))
        tw = int(ow * (1 - crop_portion))

        y0 = int((dh - 1) * y_factor)
        x0 = int((dw - 1) * x_factor)

        gt_boxes[:, 0] -= x0
        gt_boxes[:, 1] -= y0

        #####################

        outer_bound = 0.2

        polys = rbox2poly(gt_boxes).reshape(-1, 4, 2)

        # (b, 4)
        x_poly = polys[..., 0]
        y_poly = polys[..., 1]

        # bounding box with outer border on their heights and widths
        outer_bound_x = np.tile(outer_bound * gt_boxes[:, 2:3],
                                (1, x_poly.shape[-1]))
        outer_bound_y = np.tile(outer_bound * gt_boxes[:, 3:4],
                                (1, x_poly.shape[-1]))

        # (b, 4)
        x_check = np.logical_and(x_poly >= 0 - outer_bound_x,
                                 x_poly < tw + outer_bound_x)
        y_check = np.logical_and(y_poly >= 0 - outer_bound_y,
                                 y_poly < th + outer_bound_y)

        x_sum = np.sum(x_check.astype(np.int32), axis=-1)
        y_sum = np.sum(y_check.astype(np.int32), axis=-1)

        inbound = (x_sum + y_sum) > 7.

        #####################

        # x_inbound = np.logical_and(gt_boxes[:, 0] >= 0, gt_boxes[:, 0] < tw)
        # y_inbound = np.logical_and(gt_boxes[:, 1] >= 0, gt_boxes[:, 1] < th)

        #####################

        iminfo = (tw, th)

        # inbound = np.logical_and(x_inbound, y_inbound)

        inbound_th = torch.tensor(np.where(inbound)).long().view(-1)

        crop_gt_boxes_th = torch.tensor(gt_boxes[inbound]).to(
            target.bbox.device)
        # print('gt_labels before:', gt_labels.size(), inbound_th.size())
        gt_labels = gt_classes[inbound_th].to(target.bbox.device)
        # print('gt_labels after:', gt_labels.size())
        difficulty = target.get_field("difficult")
        difficulty = difficulty[inbound_th].to(target.bbox.device)

        target_cpy = RBoxList(crop_gt_boxes_th, iminfo, mode='xywha')
        target_cpy.add_field('difficult', difficulty)
        target_cpy.add_field('labels', gt_labels)
        # print('has word:', target.has_field("words"), target.get_field("words"))
        if target.has_field("words"):
            words = target.get_field("words")[inbound_th]
            target_cpy.add_field('words', words)
        if target.has_field("word_length"):
            word_length = target.get_field("word_length")[inbound_th]
            target_cpy.add_field('word_length', word_length)
        if target.has_field("masks"):
            seg_masks = target.get_field("masks")[inbound_th]
            # print('seg_masks:', seg_masks)
            target_cpy.add_field('masks', seg_masks.shift(-x0, -y0, iminfo))

        # print('rotated_gt_boxes_th:', origin_gt_boxes[0], target_cpy.bbox[0])
        # print('rotated_gt_boxes_th:', target.bbox.size(), gt_boxes.shape)

        if target_cpy.bbox.size()[0] <= 0:
            # print("target has no boxes...")
            return None

        return target_cpy