Ejemplo n.º 1
0
 def __call__(self, image, target, image_set='train'):
     image = torchvision.transforms.functional.normalize(image,
                                                         mean=self.mean,
                                                         std=self.std)
     if target is None:
         return image, None
     target = target.copy()
     if image_set in ['test']:
         return image, target
     h, w = image.shape[-2:]
     if "human_boxes" in target:
         boxes = target["human_boxes"]
         boxes = box_xyxy_to_cxcywh(boxes)
         boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
         target["human_boxes"] = boxes
     if "object_boxes" in target:
         boxes = target["object_boxes"]
         boxes = box_xyxy_to_cxcywh(boxes)
         boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
         target["object_boxes"] = boxes
     if "action_boxes" in target:
         boxes = target["action_boxes"]
         boxes = box_xyxy_to_cxcywh(boxes)
         boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
         target["action_boxes"] = boxes
     return image, target
Ejemplo n.º 2
0
 def prepare_targets_for_tracking(self, targets):
     cur_targets = []
     pre_targets = []
     for paired_targets in targets:
         for i, targets_per_image in enumerate(paired_targets):
             h, w = targets_per_image.image_size
             image_size_xyxy = torch.as_tensor([w, h, w, h],
                                               dtype=torch.float,
                                               device=self.device)
             gt_classes = targets_per_image.gt_classes
             gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
             gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
             gt_tracks = targets_per_image.gt_tracks
             if i == 0:
                 cur_targets.append({
                     "labels": gt_classes,
                     "boxes": gt_boxes,
                     "tracks": gt_tracks
                 })
             elif i == 1:
                 pre_targets.append({
                     "labels": gt_classes,
                     "boxes": gt_boxes,
                     "tracks": gt_tracks
                 })
             else:
                 raise NotImplementedError
     return cur_targets, pre_targets
Ejemplo n.º 3
0
 def __call__(self, image, target=None):
     image = F.normalize(image, mean=self.mean, std=self.std)
     if target is None:
         return image, None
     target = target.copy()
     h, w = image.shape[-2:]
     if "boxes" in target:
         boxes = target["boxes"]
         boxes = box_xyxy_to_cxcywh(boxes)
         boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
         target["boxes"] = boxes
     return image, target
Ejemplo n.º 4
0
 def prepare_targets(self, targets):
     new_targets = []
     for targets_per_image in targets:
         h, w = targets_per_image.image_size
         image_size_xyxy = torch.as_tensor([w, h, w, h],
                                           dtype=torch.float,
                                           device=self.device)
         gt_classes = targets_per_image.gt_classes
         gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
         gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
         new_targets.append({"labels": gt_classes, "boxes": gt_boxes})
     return new_targets
Ejemplo n.º 5
0
 def prepare_targets(self, targets):
     new_targets = []
     for targets_per_image in targets:
         h, w = targets_per_image.image_size
         image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device)
         gt_classes = targets_per_image.gt_classes
         gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
         gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
         new_targets.append({"labels": gt_classes, "boxes": gt_boxes})
         if self.mask_on and hasattr(targets_per_image, 'gt_masks'):
             gt_masks = targets_per_image.gt_masks
             gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w)
             new_targets[-1].update({'masks': gt_masks})
     return new_targets
Ejemplo n.º 6
0
    def __call__(self, image, target=None):
        image = F.normalize(image, mean=self.mean, std=self.std)
        if target is None:
            return image, None
        target = target.copy()
        h, w = image.shape[-2:]
        if "boxes" in target:
            boxes = target["boxes"]
            boxes = box_xyxy_to_cxcywh(boxes)
            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
            target["boxes"] = boxes

        if self.custom_backbone == 'False':
            ####
            image = image.repeat(3, 1, 1)
            ####

        return image, target
Ejemplo n.º 7
0
    def __call__(self, image, target=None):
        if type(image) == list:
            image = [
                F.normalize(i, mean=self.mean, std=self.std) for i in image
            ]
            h, w = image[0].shape[1:3]
        else:
            image = F.normalize(image, mean=self.mean, std=self.std)
            h, w = image.shape[1:3]
        if target is None:
            return image, None
        target = target.copy()

        if "boxes" in target:
            boxes = target["boxes"]
            boxes = box_xyxy_to_cxcywh(boxes)
            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
            target["boxes"] = boxes
        return image, target
Ejemplo n.º 8
0
    def __call__(self, image, target=None):
        image = np.array(image).astype("float32")
        image = image / 255.
        image = (image - self.mean) / self.std
        image = image.transpose((2, 0, 1))
        image = dg.to_variable(image)
        if target is None:
            return image, None

        for k in target.keys():
            target[k] = dg.to_variable(target[k])
        h, w = image.shape[-2:]
        if "boxes" in target:
            boxes = target["boxes"]
            boxes = box_xyxy_to_cxcywh(boxes)
            boxes = boxes.numpy() / np.array([w, h, w, h]).astype("float32")
            boxes = dg.to_variable(boxes)
            target["boxes"] = boxes

        return image, target
Ejemplo n.º 9
0
    def convert_anno_format(self, batched_inputs):
        targets = []
        for bi in batched_inputs:
            target = {}
            h, w = bi["image"].shape[-2:]
            boxes = box_ops.box_xyxy_to_cxcywh(
                bi["instances"].gt_boxes.tensor /
                torch.tensor([w, h, w, h], dtype=torch.float32))
            target["boxes"] = boxes.to(self.device)
            target["area"] = bi["instances"].gt_boxes.area().to(self.device)
            target["labels"] = bi["instances"].gt_classes.to(self.device)
            if hasattr(bi["instances"], "gt_masks"):
                target["masks"] = bi["instances"].gt_masks
            target["iscrowd"] = torch.zeros_like(target["labels"],
                                                 device=self.device)
            target["orig_size"] = torch.tensor([bi["height"], bi["width"]],
                                               device=self.device)
            target["size"] = torch.tensor([h, w], device=self.device)
            target["image_id"] = torch.tensor(bi["image_id"],
                                              device=self.device)
            targets.append(target)

        return targets
Ejemplo n.º 10
0
    def predict_boxes(self, boxes):
        device = list(self.parameters())[0].device

        # transform boxes
        h, w = self.preprocessed_images.size()[-2:]
        boxes = boxes.to(device)
        boxes = resize_boxes(boxes, self.original_image_sizes[0], [h, w])
        boxes = box_xyxy_to_cxcywh(boxes)
        boxes = boxes / torch.tensor(
            [w, h, w, h], dtype=torch.float32, device=device)
        query_emb = [{"boxes": boxes}]
        del boxes

        outputs = self(self.preprocessed_images, query_emb)
        del query_emb

        out_logits, out_bbox = outputs["pred_logits"].detach(
        ), outputs["pred_boxes"].detach()
        del outputs

        prob = F.softmax(out_logits, -1)
        if out_logits.size()[-1] == 2:
            scores, _ = prob.max(-1)
        else:
            scores, _ = prob[..., :-1].max(-1)

        boxes = box_cxcywh_to_xyxy(out_bbox)
        img_h, img_w = self.original_image_sizes[0]
        img_h, img_w = torch.tensor([img_h]), torch.tensor([img_w])
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(device)
        boxes = boxes * scale_fct

        del scale_fct, img_h, img_w
        del prob
        del out_logits

        return boxes.squeeze(0).detach(), scores.squeeze(0).detach()
Ejemplo n.º 11
0
 def test_box_cxcywh_to_xyxy(self):
     t = torch.rand(10, 4)
     r = box_ops.box_xyxy_to_cxcywh(box_ops.box_cxcywh_to_xyxy(t))
     self.assertLess((t - r).abs().max(), 1e-5)
Ejemplo n.º 12
0
    def __call__(self, image, target=None):
        """image(C, H, W) normalize function
        
        target(optional to normalize): boxes coordinates [x_lt, y_lt, x_rd, y_rd]
            normalize to [normalized_cx, normalized_cy, normalized_w, normalized_h]

        Parameters
        ----------
        image : {float, tensor(3-dim)} of shape (channel, H, W) before normalize

        target : (optional) {dict_list}
                    {
                        "boxes" : {float, matrix} shape of (target_bbox_number, [x_lt, y_lt, x_rd, y_rd]), target bbox coordinate set
                        "labels" : {int, vector} shape of (target_bbox_number), target bbox label class
                        "image_id" : {int, scalar}, image id
                        "area" : {float, vector} shape of (target_bbox_number), every bbox area
                        "iscrowd" : {int, vector}, value is (0 or 1), shape of (target_bbox_number), 0: segmentation is polygon format, 1 : segmentation is RLE format
                        "orig_size" : {int} of [H, W]
                        "size" : {int} of [H, W]
                    }

        Returns
        -------
        image : {float, tensor(3-dim)} of shape (channel, H, W) after normalize

        target : (optional) {dict_list}
                    {
                        "boxes" : {float, matrix} shape of (target_bbox_number, [normalized_cx, normalized_cy, normalized_w, normalized_h]), target bbox coordinate set
                        "labels" : {int, vector} shape of (target_bbox_number), target bbox label class
                        "image_id" : {int, scalar}, image id
                        "area" : {float, vector} shape of (target_bbox_number), every bbox area
                        "iscrowd" : {int, vector}, value is (0 or 1), shape of (target_bbox_number), 0: segmentation is polygon format, 1 : segmentation is RLE format
                        "orig_size" : {int} of [H, W]
                        "size" : {int} of [H, W]
                    }
        
        """
        # ----------------------
        # 用均值和标准差对张量图像进行标准化处理
        #
        # X' = (X - mean) / std
        #
        image = F.normalize(image, mean=self.mean, std=self.std)
        # ----------------------

        if target is None:
            return image, None

        target = target.copy()
        h, w = image.shape[-2:]

        # ----------------------
        # mapping image boxes H,W to [0, 1]
        #
        if "boxes" in target:
            boxes = target["boxes"]
            boxes = box_xyxy_to_cxcywh(boxes)
            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
            target["boxes"] = boxes
        # ----------------------

        return image, target