def __call__(self, image, target, image_set='train'): image = torchvision.transforms.functional.normalize(image, mean=self.mean, std=self.std) if target is None: return image, None target = target.copy() if image_set in ['test']: return image, target h, w = image.shape[-2:] if "human_boxes" in target: boxes = target["human_boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["human_boxes"] = boxes if "object_boxes" in target: boxes = target["object_boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["object_boxes"] = boxes if "action_boxes" in target: boxes = target["action_boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["action_boxes"] = boxes return image, target
def prepare_targets_for_tracking(self, targets): cur_targets = [] pre_targets = [] for paired_targets in targets: for i, targets_per_image in enumerate(paired_targets): h, w = targets_per_image.image_size image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device) gt_classes = targets_per_image.gt_classes gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy gt_boxes = box_xyxy_to_cxcywh(gt_boxes) gt_tracks = targets_per_image.gt_tracks if i == 0: cur_targets.append({ "labels": gt_classes, "boxes": gt_boxes, "tracks": gt_tracks }) elif i == 1: pre_targets.append({ "labels": gt_classes, "boxes": gt_boxes, "tracks": gt_tracks }) else: raise NotImplementedError return cur_targets, pre_targets
def __call__(self, image, target=None): image = F.normalize(image, mean=self.mean, std=self.std) if target is None: return image, None target = target.copy() h, w = image.shape[-2:] if "boxes" in target: boxes = target["boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["boxes"] = boxes return image, target
def prepare_targets(self, targets): new_targets = [] for targets_per_image in targets: h, w = targets_per_image.image_size image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device) gt_classes = targets_per_image.gt_classes gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy gt_boxes = box_xyxy_to_cxcywh(gt_boxes) new_targets.append({"labels": gt_classes, "boxes": gt_boxes}) return new_targets
def prepare_targets(self, targets): new_targets = [] for targets_per_image in targets: h, w = targets_per_image.image_size image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device) gt_classes = targets_per_image.gt_classes gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy gt_boxes = box_xyxy_to_cxcywh(gt_boxes) new_targets.append({"labels": gt_classes, "boxes": gt_boxes}) if self.mask_on and hasattr(targets_per_image, 'gt_masks'): gt_masks = targets_per_image.gt_masks gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w) new_targets[-1].update({'masks': gt_masks}) return new_targets
def __call__(self, image, target=None): image = F.normalize(image, mean=self.mean, std=self.std) if target is None: return image, None target = target.copy() h, w = image.shape[-2:] if "boxes" in target: boxes = target["boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["boxes"] = boxes if self.custom_backbone == 'False': #### image = image.repeat(3, 1, 1) #### return image, target
def __call__(self, image, target=None): if type(image) == list: image = [ F.normalize(i, mean=self.mean, std=self.std) for i in image ] h, w = image[0].shape[1:3] else: image = F.normalize(image, mean=self.mean, std=self.std) h, w = image.shape[1:3] if target is None: return image, None target = target.copy() if "boxes" in target: boxes = target["boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["boxes"] = boxes return image, target
def __call__(self, image, target=None): image = np.array(image).astype("float32") image = image / 255. image = (image - self.mean) / self.std image = image.transpose((2, 0, 1)) image = dg.to_variable(image) if target is None: return image, None for k in target.keys(): target[k] = dg.to_variable(target[k]) h, w = image.shape[-2:] if "boxes" in target: boxes = target["boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes.numpy() / np.array([w, h, w, h]).astype("float32") boxes = dg.to_variable(boxes) target["boxes"] = boxes return image, target
def convert_anno_format(self, batched_inputs): targets = [] for bi in batched_inputs: target = {} h, w = bi["image"].shape[-2:] boxes = box_ops.box_xyxy_to_cxcywh( bi["instances"].gt_boxes.tensor / torch.tensor([w, h, w, h], dtype=torch.float32)) target["boxes"] = boxes.to(self.device) target["area"] = bi["instances"].gt_boxes.area().to(self.device) target["labels"] = bi["instances"].gt_classes.to(self.device) if hasattr(bi["instances"], "gt_masks"): target["masks"] = bi["instances"].gt_masks target["iscrowd"] = torch.zeros_like(target["labels"], device=self.device) target["orig_size"] = torch.tensor([bi["height"], bi["width"]], device=self.device) target["size"] = torch.tensor([h, w], device=self.device) target["image_id"] = torch.tensor(bi["image_id"], device=self.device) targets.append(target) return targets
def predict_boxes(self, boxes): device = list(self.parameters())[0].device # transform boxes h, w = self.preprocessed_images.size()[-2:] boxes = boxes.to(device) boxes = resize_boxes(boxes, self.original_image_sizes[0], [h, w]) boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor( [w, h, w, h], dtype=torch.float32, device=device) query_emb = [{"boxes": boxes}] del boxes outputs = self(self.preprocessed_images, query_emb) del query_emb out_logits, out_bbox = outputs["pred_logits"].detach( ), outputs["pred_boxes"].detach() del outputs prob = F.softmax(out_logits, -1) if out_logits.size()[-1] == 2: scores, _ = prob.max(-1) else: scores, _ = prob[..., :-1].max(-1) boxes = box_cxcywh_to_xyxy(out_bbox) img_h, img_w = self.original_image_sizes[0] img_h, img_w = torch.tensor([img_h]), torch.tensor([img_w]) scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(device) boxes = boxes * scale_fct del scale_fct, img_h, img_w del prob del out_logits return boxes.squeeze(0).detach(), scores.squeeze(0).detach()
def test_box_cxcywh_to_xyxy(self): t = torch.rand(10, 4) r = box_ops.box_xyxy_to_cxcywh(box_ops.box_cxcywh_to_xyxy(t)) self.assertLess((t - r).abs().max(), 1e-5)
def __call__(self, image, target=None): """image(C, H, W) normalize function target(optional to normalize): boxes coordinates [x_lt, y_lt, x_rd, y_rd] normalize to [normalized_cx, normalized_cy, normalized_w, normalized_h] Parameters ---------- image : {float, tensor(3-dim)} of shape (channel, H, W) before normalize target : (optional) {dict_list} { "boxes" : {float, matrix} shape of (target_bbox_number, [x_lt, y_lt, x_rd, y_rd]), target bbox coordinate set "labels" : {int, vector} shape of (target_bbox_number), target bbox label class "image_id" : {int, scalar}, image id "area" : {float, vector} shape of (target_bbox_number), every bbox area "iscrowd" : {int, vector}, value is (0 or 1), shape of (target_bbox_number), 0: segmentation is polygon format, 1 : segmentation is RLE format "orig_size" : {int} of [H, W] "size" : {int} of [H, W] } Returns ------- image : {float, tensor(3-dim)} of shape (channel, H, W) after normalize target : (optional) {dict_list} { "boxes" : {float, matrix} shape of (target_bbox_number, [normalized_cx, normalized_cy, normalized_w, normalized_h]), target bbox coordinate set "labels" : {int, vector} shape of (target_bbox_number), target bbox label class "image_id" : {int, scalar}, image id "area" : {float, vector} shape of (target_bbox_number), every bbox area "iscrowd" : {int, vector}, value is (0 or 1), shape of (target_bbox_number), 0: segmentation is polygon format, 1 : segmentation is RLE format "orig_size" : {int} of [H, W] "size" : {int} of [H, W] } """ # ---------------------- # 用均值和标准差对张量图像进行标准化处理 # # X' = (X - mean) / std # image = F.normalize(image, mean=self.mean, std=self.std) # ---------------------- if target is None: return image, None target = target.copy() h, w = image.shape[-2:] # ---------------------- # mapping image boxes H,W to [0, 1] # if "boxes" in target: boxes = target["boxes"] boxes = box_xyxy_to_cxcywh(boxes) boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["boxes"] = boxes # ---------------------- return image, target