def loss_boxes(self, outputs, targets, indices, num_boxes): """ Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size. """ # assert 'pred_boxes' in outputs idx = self._get_src_permutation_idx(indices) src_boxes = outputs["pred_boxes"][idx] target_boxes = torch.cat( [t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0) loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none") losses = {} losses["loss_bbox"] = loss_bbox.sum() / num_boxes # loss_giou = 1 - torch.diag(generalized_box_iou(box_cxcywh_to_xyxy(src_boxes), # box_cxcywh_to_xyxy(target_boxes))) loss_giou = 1 - torch.diag( generalized_box_iou( box_convert(src_boxes, in_fmt="cxcywh", out_fmt="xyxy"), box_convert(target_boxes, in_fmt="cxcywh", out_fmt="xyxy"))) losses["loss_giou"] = loss_giou.sum() / num_boxes return losses
def test_bbox_same(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy") self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_same, exp_xyxy)).item() box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh") self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_same, exp_xyxy)).item() box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh") self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_same, exp_xyxy)).item()
def update(self, preds: List[Dict[str, Tensor]], target: List[Dict[str, Tensor]]) -> None: # type: ignore """Add detections and ground truth to the metric. Args: preds: A list consisting of dictionaries each containing the key-values (each dictionary corresponds to a single image): - ``boxes``: ``torch.FloatTensor`` of shape [num_boxes, 4] containing `num_boxes` detection boxes of the format specified in the contructor. By default, this method expects [xmin, ymin, xmax, ymax] in absolute image coordinates. - ``scores``: ``torch.FloatTensor`` of shape [num_boxes] containing detection scores for the boxes. - ``labels``: ``torch.IntTensor`` of shape [num_boxes] containing 0-indexed detection classes for the boxes. target: A list consisting of dictionaries each containing the key-values (each dictionary corresponds to a single image): - ``boxes``: ``torch.FloatTensor`` of shape [num_boxes, 4] containing `num_boxes` ground truth boxes of the format specified in the contructor. By default, this method expects [xmin, ymin, xmax, ymax] in absolute image coordinates. - ``labels``: ``torch.IntTensor`` of shape [num_boxes] containing 1-indexed ground truth classes for the boxes. Raises: ValueError: If ``preds`` is not of type List[Dict[str, Tensor]] ValueError: If ``target`` is not of type List[Dict[str, Tensor]] ValueError: If ``preds`` and ``target`` are not of the same length ValueError: If any of ``preds.boxes``, ``preds.scores`` and ``preds.labels`` are not of the same length ValueError: If any of ``target.boxes`` and ``target.labels`` are not of the same length ValueError: If any box is not type float and of length 4 ValueError: If any class is not type int and of length 1 ValueError: If any score is not type float and of length 1 """ _input_validator(preds, target) for item in preds: boxes = _fix_empty_tensors(item["boxes"]) boxes = box_convert(boxes, in_fmt=self.box_format, out_fmt="xyxy") self.detection_boxes.append(boxes) self.detection_labels.append(item["labels"]) self.detection_scores.append(item["scores"]) for item in target: boxes = _fix_empty_tensors(item["boxes"]) boxes = box_convert(boxes, in_fmt=self.box_format, out_fmt="xyxy") self.groundtruth_boxes.append(boxes) self.groundtruth_labels.append(item["labels"])
def forward(self, outputs, targets): """Performs the matching Params: outputs: This is a dict that contains at least these entries: "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth objects in the target) containing the class labels "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates Returns: A list of size batch_size, containing tuples of (index_i, index_j) where: - index_i is the indices of the selected predictions (in order) - index_j is the indices of the corresponding selected targets (in order) For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes) """ bs, num_queries = outputs["pred_logits"].shape[:2] # We flatten to compute the cost matrices in a batch out_prob = (outputs["pred_logits"].flatten(0, 1).softmax(-1) ) # [batch_size * num_queries, num_classes] out_bbox = outputs["pred_boxes"].flatten( 0, 1) # [batch_size * num_queries, 4] # Also concat the target labels and boxes tgt_ids = torch.cat([v["labels"] for v in targets]) tgt_bbox = torch.cat([v["boxes"] for v in targets]) # Compute the classification cost. Contrary to the loss, we don't use the NLL, # but approximate it in 1 - proba[target class]. # The 1 is a constant that doesn't change the matching, it can be ommitted. cost_class = -out_prob[:, tgt_ids] # Compute the L1 cost between boxes cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) # Compute the giou cost betwen boxes # cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)) cost_giou = -generalized_box_iou( box_convert(out_bbox, in_fmt="cxcywh", out_fmt="xyxy"), box_convert(tgt_bbox, in_fmt="cxcywh", out_fmt="xyxy")) # Final cost matrix C = (self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou) C = C.view(bs, num_queries, -1).cpu() sizes = [len(v["boxes"]) for v in targets] indices = [ linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1)) ] return [( torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64), ) for i, j in indices]
def test_bbox_same(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) assert exp_xyxy.size() == torch.Size([4, 4]) assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy) assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy) assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
def test_bbox_convert_jit(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) scripted_fn = torch.jit.script(ops.box_convert) TOLERANCE = 1e-3 box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh') self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE) box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh') self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)
def test_bbox_convert_jit(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) scripted_fn = torch.jit.script(ops.box_convert) TOLERANCE = 1e-3 box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh') torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE) box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh') torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
def test_bbox_xyxy_cxcywh(self): # Simple test convert boxes to xywh and back. Make sure they are same. # box_tensor is in x1 y1 x2 y2 format. box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) assert exp_cxcywh.size() == torch.Size([4, 4]) box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") assert_equal(box_cxcywh, exp_cxcywh) # Reverse conversion box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy") assert_equal(box_xyxy, box_tensor)
def test_bbox_xywh_cxcywh(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) # This is wrong exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) assert exp_cxcywh.size() == torch.Size([4, 4]) box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh") assert_equal(box_cxcywh, exp_cxcywh) # Reverse conversion box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") assert_equal(box_xywh, box_tensor)
def __getitem__(self, i): image_path, image_meta, annotation = self.data_list[i] # read image image = np.array(Image.open(image_path)) # read meta image_meta = { 'image_id': image_meta['id'], 'height': image_meta['height'], 'width': image_meta['width'] } # read bboxes & labels bboxes = [] labels = [] for anno in annotation: bboxes.append(anno['bbox']) labels.append(anno['category_id']) # transform image, image_meta, bboxes, labels = self.transform( image=image, image_meta=image_meta, bboxes=bboxes, labels=labels) bboxes = box_convert(torch.tensor(bboxes), in_fmt='xywh', out_fmt=self.fmt) if self.norm: bboxes = bboxes.div(image.size(-1)).float() labels = torch.tensor(labels) return image, image_meta, bboxes, labels
def __getitem__(self, index): img, target = tools.load_img_target(self, index) cls_labels = [obj['category_id'] for obj in target] bbox_labels = [obj['bbox'] for obj in target] transformed = self.transform(image=img, bboxes=bbox_labels, class_labels=cls_labels) img = transformed['image'] cls_labels = torch.as_tensor(transformed['class_labels']) bbox_labels = cv_ops.box_convert(torch.as_tensor( transformed['bboxes']), in_fmt='xywh', out_fmt='xyxy') all_level_points, class_targets, distance_targets = self._encode_targets( cls_labels, bbox_labels) centerness_targets = self._encode_centerness_targets(distance_targets) return img, { 'points': all_level_points, 'class_targets': class_targets, 'distance_targets': distance_targets, 'centerness_targets': centerness_targets }
def forward(self, outputs, target_sizes): """ Perform the computation Parameters: outputs: raw outputs of the model target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch For evaluation, this must be the original image size (before any data augmentation) For visualization, this should be the image size after data augment, but before padding """ out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes'] assert len(out_logits) == len(target_sizes) assert target_sizes.shape[1] == 2 prob = F.softmax(out_logits, -1) scores, labels = prob[..., :-1].max(-1) # convert to [x0, y0, x1, y1] format # boxes = box_ops.box_cxcywh_to_xyxy(out_bbox) boxes = box_convert(out_bbox, in_fmt="cxcywh", out_fmt="xyxy") # and from relative [0, 1] to absolute [0, height] coordinates img_h, img_w = target_sizes.unbind(1) scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1) boxes = boxes * scale_fct[:, None, :] results = [{ 'scores': s, 'labels': l, 'boxes': b } for s, l, b in zip(scores, labels, boxes)] return results
def build_reg_and_cls_targets(self, boxes): boxes_xyxy = ops.box_convert(boxes, 'cxcywh', 'xyxy') # [B, 4] iou_dist = ops.box_iou(self.anchors_xyxy, boxes_xyxy) # [A, B] closest_box_indices = torch.argmax(iou_dist, dim=1) # [A, 1] target_boxes = boxes[closest_box_indices] # [A, 4] # Both [A, 2] xy_targets = ( (target_boxes[..., :2] - self.anchors[..., :2]) / self.anchors[..., 2:]) wh_targets = torch.log(target_boxes[..., 2:] / self.anchors[..., 2:]) reg_target = torch.hstack((xy_targets, wh_targets)) # [A, 4] pos_selector = torch.any(iou_dist > self.pos_thresh, dim=1) # [A,] neg_selector = torch.all(iou_dist < self.neg_thresh, dim=1) # [A,] valid_pos_selector = pos_selector & self.valid_anchors_selector # [A,] valid_neg_selector = neg_selector & self.valid_anchors_selector # [A,] cls_target = torch.full( (len(self.anchors),), INVALID_ANCHOR_LABEL, device=boxes.device) # [A,] cls_target[valid_pos_selector] = POS_ANCHOR_LABEL # [A,] cls_target[valid_neg_selector] = NEG_ANCHOR_LABEL # [A,] return reg_target, cls_target
def test_bbox_xywh_cxcywh(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) # This is wrong exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh") self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4])) self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item() # Reverse conversion box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") self.assertEqual(box_xywh.size(), torch.Size([4, 4])) self.assertEqual(box_xywh.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_xywh, box_tensor)).item()
def test_bbox_xyxy_xywh(self): # Simple test convert boxes to xywh and back. Make sure they are same. # box_tensor is in x1 y1 x2 y2 format. box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") self.assertEqual(exp_xywh.size(), torch.Size([4, 4])) self.assertEqual(exp_xywh.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_xywh, exp_xywh)).item() # Reverse conversion box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy") self.assertEqual(box_xyxy.size(), torch.Size([4, 4])) self.assertEqual(box_xyxy.dtype, box_tensor.dtype) assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
def pre_predict(self, outputs: tuple, conf_thresh: float = 0.01, top_k: int = 200) -> tuple: """ モデルの出力結果を予測データに変換する Args: outputs (tuple): モデルの出力. (予測オフセット, 予測信頼度) conf_thresh (float): 信頼度の閾値 top_k (int): 検出数 Returns: tuple: (予測BBox, 予測信頼度, 予測クラス) - 予測BBox : [N, 8732, 4] (coord fmt: [xmin, ymin, xmax, ymax], 0 ~ 1) - 予測信頼度 : [N, 8732] - 予測クラス : [N, 8732] """ out_locs, out_objs, out_confs = outputs out_locs[..., :2] = out_locs[..., :2].sigmoid() out_objs = out_objs.sigmoid() out_confs = out_confs.sigmoid() out_confs = out_confs * out_objs[..., None] # to CPU out_locs = out_locs.detach().cpu() out_objs = out_objs.detach().cpu() out_confs = out_confs.detach().cpu() pred_bboxes = [] pred_scores = [] pred_class_ids = [] for locs, objs, confs in zip(out_locs, out_objs, out_confs): bboxes = [] scores = [] class_ids = [] for class_id in range(confs.size(1)): pos_mask = (confs[:, class_id] > conf_thresh) * ( confs[:, class_id].argsort(descending=True).argsort() < top_k) scores_ = confs[pos_mask, class_id] class_ids_ = torch.full_like(scores_, class_id + 1, dtype=torch.long) bboxes_ = self._calc_coord(locs[pos_mask], self.pboxes[pos_mask]) bboxes_ = box_convert(bboxes_, in_fmt='xywh', out_fmt='xyxy') bboxes.append(bboxes_) scores.append(scores_) class_ids.append(class_ids_) pred_bboxes.append(torch.cat(bboxes)) pred_scores.append(torch.cat(scores)) pred_class_ids.append(torch.cat(class_ids)) return pred_bboxes, pred_scores, pred_class_ids
def _decode_pred_logits(pred_logits: Tensor): """ Decode the prediction logit from the PostPrecess. """ # Compute conf # box_conf x class_conf, w/ shape: num_anchors x num_classes scores = pred_logits[:, 5:] * pred_logits[:, 4:5] boxes = box_convert(pred_logits[:, :4], in_fmt="cxcywh", out_fmt="xyxy") return boxes, scores
def __getitem__(self, index): img, target = tools.load_img_target(self, index) img_info = self.coco.loadImgs(self.ids[index])[0] iw, ih = img_info['width'], img_info['height'] class_labels, bbox_labels, mask_labels = [], [], [] for obj in target: if not tools.is_correct_instance(obj, self.cat_idx_list, iw, ih): continue class_labels.append(self.cat_to_label_map[obj['category_id']]) bbox_labels.append(obj['bbox']) # rle = coco_mask.frPyObjects(obj['segmentation'], ih, iw) # if obj['iscrowd'] == 0: # rle = coco_mask.merge(rle) # mask = coco_mask.decode(rle) # mask_labels.append(mask) transformed = self.img_transform(image=img, bboxes=bbox_labels, class_labels=class_labels) # transformed = self.img_transform(image=img, masks=mask_labels, bboxes=bbox_labels, class_labels=class_labels) img = tools.TENSOR_TRANSFORM(transformed['image']) # mask_labels = transformed['masks'] class_labels = transformed['class_labels'] bbox_labels = transformed['bboxes'] if len(bbox_labels) == 0: # For any instance with classification label 0 (background), only classification loss will be computed, without mask loss, centerness loss and bbox loss. # When there is no instances in an image, it doesn't matter the value of the added bbox. mask_labels = [np.zeros((self.h, self.w))] bbox_labels = [[0., 0., 10., 10.]] class_labels = [0] class_labels = torch.as_tensor(class_labels) # instance_mask_labels = self._generate_instance_mask_labels(mask_labels, bbox_labels) # instance_mask_labels = torch.as_tensor(np.array(instance_mask_labels)).float() bbox_labels = cv_ops.box_convert(torch.as_tensor(bbox_labels, dtype=torch.float32), in_fmt='xywh', out_fmt='xyxy') bbox_labels = cv_ops.clip_boxes_to_image(bbox_labels, (ih, iw)) class_targets, distance_targets = self._encode_targets( class_labels, bbox_labels, None) centerness_targets = tools.encode_centerness_targets(distance_targets) return img, self.points, { 'class': class_targets, 'distance': distance_targets, 'centerness': centerness_targets }
def __call__(self, image, target=None): image = F.normalize(image, mean=self.mean, std=self.std) if target is None: return image, None target = target.copy() h, w = image.shape[-2:] if "boxes" in target: boxes = target["boxes"] boxes = box_convert(boxes, in_fmt="xyxy", out_fmt="cxcywh") boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) target["boxes"] = boxes return image, target
def stats_dataset(dataset: ObjectDetectionDataSet, rcnn_transform: GeneralizedRCNNTransform = False): """ Iterates over the dataset and returns some stats. Can be useful to pick the right anchor box sizes. """ from torchvision.ops import box_convert, box_area stats = { 'image_height': [], 'image_width': [], 'image_mean': [], 'image_std': [], 'boxes_height': [], 'boxes_width': [], 'boxes_num': [], 'boxes_area': [] } for batch in dataset: # Batch x, y, x_name, y_name = batch['x'], batch['y'], batch['x_name'], batch[ 'y_name'] # Transform if rcnn_transform: x, y = rcnn_transform([x], [y]) x, y = x.tensors, y[0] # Image stats['image_height'].append(x.shape[-2]) stats['image_width'].append(x.shape[-1]) stats['image_mean'].append(x.mean().item()) stats['image_std'].append(x.std().item()) # Target wh = box_convert(y['boxes'], 'xyxy', 'xywh')[:, -2:] stats['boxes_height'].append(wh[:, -2]) stats['boxes_width'].append(wh[:, -1]) stats['boxes_num'].append(len(wh)) stats['boxes_area'].append(box_area(y['boxes'])) stats['image_height'] = torch.tensor(stats['image_height'], dtype=torch.float) stats['image_width'] = torch.tensor(stats['image_width'], dtype=torch.float) stats['image_mean'] = torch.tensor(stats['image_mean'], dtype=torch.float) stats['image_std'] = torch.tensor(stats['image_std'], dtype=torch.float) stats['boxes_height'] = torch.cat(stats['boxes_height']) stats['boxes_width'] = torch.cat(stats['boxes_width']) stats['boxes_area'] = torch.cat(stats['boxes_area']) stats['boxes_num'] = torch.tensor(stats['boxes_num'], dtype=torch.float) return stats
def __init__( self, anchors, img_width, img_height, pos_thresh=0.5, neg_thresh=0.2): self.pos_thresh = pos_thresh self.neg_thresh = neg_thresh self.anchors = anchors self.anchors_xyxy = ops.box_convert(anchors, 'cxcywh', 'xyxy') # [A, 4] self.valid_anchors_selector = ( (self.anchors_xyxy[:, 0] >= 0) & (self.anchors_xyxy[:, 1] >= 0) & (self.anchors_xyxy[:, 2] < img_width) & (self.anchors_xyxy[:, 3] < img_height))
def update(self, img: ImageT) -> np.ndarray: self.model.eval() side_size = int(round(self.curr_instance_side_size)) bbox = BBox.build_from_center_and_size( self.target_bbox.center, np.asarray((side_size, side_size))) instance_img = center_crop_and_resize( img, bbox, (self.cfg.instance_size, self.cfg.instance_size)) if self.on_instance_img_extract: self.on_instance_img_extract(instance_img) instance_img = pil_to_tensor(instance_img).to(self.device) pred_reg, pred_cls = self.model.inference(instance_img, self.kernel_reg, self.kernel_cls) pred_reg = pred_reg.squeeze() pred_cls = pred_cls.squeeze() pred_cls = F.softmax(pred_cls, dim=1) pred_cls_max = pred_cls.argmax(dim=1) # TODO Store the range somewhere as it may be faster. scores = pred_cls[list(range(len(pred_cls))), pred_cls_max] scores[pred_cls_max == 0] = 0 # The 0-th position is the background. # TODO Think of modifying the regression predictions in place. xy_vals = pred_reg[:, :2] * self.anchors[:, 2:] + self.anchors[:, :2] wh_vals = torch.exp(pred_reg[:, 2:]) * self.anchors[:, 2:] boxes = torch.hstack((xy_vals, wh_vals)) boxes = ops.box_convert(boxes, 'cxcywh', 'xyxy') boxes = ops.clip_boxes_to_image( boxes, (self.cfg.instance_size, self.cfg.instance_size)) response = (1 - self.cfg.cosine_win_influence) * response + \ self.cfg.cosine_win_influence * self.cosine_win # The assumption is that the peak response value is in the center of the # response map. Thus, we compute the change with respect to the center # and convert it back to the pixel coordinates in the image. peak_response_pos = np.asarray( np.unravel_index(response.argmax(), response.shape)) # Update target scale. self.curr_instance_side_size *= new_scale # Change from [row, col] to [x, y] coordinates. self.target_bbox.shift(disp_in_image[::-1]) self.target_bbox.rescale(new_scale, new_scale) return self.target_bbox.as_xywh()
def visualize_test(root, select_from): dataset = datautils.GroZiTestSet(root) if select_from == 'min': idxset = dataset.least_annotated() print(f'There are {len(idxset)} least-annotated images') elif select_from == 'max': idxset = dataset.most_annotated() print(f'There are {len(idxset)} most-annotated images') else: idxset = range(len(dataset)) print(f'There are {len(dataset)} images') img, anns, boxes = dataset[random.choice(idxset)] print(f'Annotations in image: {len(anns)}') utils.show(img, groundtruth=tvops.box_convert(boxes, 'xyxy', 'xywh'), groundtruth_labels=anns)
def detect(conf_thresh, save, state_file, image_file): ''' Detect products and visualize the detections. ''' state_dict = torch.load(state_file)[ proposals_training.MODEL_STATE_DICT_KEY] model = proposals.gln().cuda() model.load_state_dict(state_dict) model.eval() generator = ProposalGenerator(model, confidence_threshold=conf_thresh) img = ttf.to_tensor(pil.Image.open(image_file)) with torch.no_grad(): detections = generator.generate_proposals(img) utils.show( img, utils.recall_tensor(tvops.box_convert(detections, 'xyxy', 'xywh'))) if save is not None: utils.save(img, save, groundtruth=utils.recall_tensor( tvops.box_convert(detections, 'xyxy', 'xywh')))
def matching_box(pbs: Tensor, gbs: Tensor, pb_format='cxcywh', gb_format='cxcywh', threshold=0.5) -> Tensor: """ Matching the default boxes to ground truth boxes of category Args: 2 set of boxes in (x1, y1, x2, y2) format. pbs - Tensor[num_prior, 4] gbs - Tensor[num_obj, 4] Return: positive_map, positive_set """ # print(pbs.device, gbs.device) xy_pbs = box_convert(pbs, pb_format, 'xyxy') xy_gbs = box_convert(gbs, gb_format, 'xyxy') # print(xy_pbs.device, xy_gbs.device) overlaps = box_iou(xy_pbs, xy_gbs) # [N, M] # Các trường hợp dẫn đến tồn tại một obj không được gắn với bất kì prior box nào trong tập positive # 1. Nó không phải là best cho bất kì prior box nào # 2. Các overlab của nó nhỏ hơn threshold best_p4g_ind = torch.argmax(overlaps, dim=0) # [M] assert best_p4g_ind.size(0) == gbs.size(0) best_g4p_overlap, best_g4p_ind = torch.max(overlaps, dim=1) best_g4p_ind[best_p4g_ind] = torch.LongTensor(range( best_p4g_ind.size(0))).to(best_g4p_ind.device) # Giải quyết TH1 # Đảm bảo vượt qua bước kiểm tra threshold, Giải quyết TH2 best_g4p_overlap[best_p4g_ind] = 1. # then match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5). positive_map = best_g4p_overlap > threshold positive_set = best_g4p_ind[positive_map] return positive_map, positive_set # [num_prior, 1]
def __getitem__(self, idx: int): img = torch.rand(self.img_shape) boxes = torch.tensor( [self._random_bbox() for _ in range(self.num_boxes)], dtype=torch.float32) boxes = ops.clip_boxes_to_image(boxes, (self.img_shape[1], self.img_shape[2])) # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert converted_boxes = ops.box_convert(boxes, in_fmt="xyxy", out_fmt=self.box_fmt) labels = torch.randint(self.num_classes, (self.num_boxes, ), dtype=torch.long) return img, {"boxes": converted_boxes, "labels": labels}
def calculate_metrics(pred: Tensor, gold: Tensor, mask: Optional[Tensor] = None) -> Dict[str, Tensor]: if mask is None: mask = pred.new_ones(pred.shape[:-1]).unsqueeze(-1) pred_center = box_convert(pred, in_fmt='xyxy', out_fmt='cxcywh')[..., :2] gold_center = box_convert(gold, in_fmt='xyxy', out_fmt='cxcywh')[..., :2] sum_ade = ((pred_center - gold_center)**2 * mask.float()).sum(dim=-1).sqrt().sum() sum_fde = ((pred_center[:, -1] - gold_center[:, -1])**2 * mask.float()[:, -1]).sum(dim=-1).sqrt().sum() num_ade = mask.float().sum() num_fde = mask.float()[:, -1].sum() sum_fiou = box_iou(pred[:, -1].reshape(-1, 4).contiguous(), gold[:, -1].reshape(-1, 4).contiguous()).diag() sum_fiou = sum_fiou[~sum_fiou.isnan()].sum() return { "sum_ade": sum_ade, "sum_fde": sum_fde, "num_ade": num_ade, "num_fde": num_fde, "sum_fiou": sum_fiou }
def normalize_boxes(boxes: Tensor, original_size: List[int]) -> Tensor: height = torch.tensor(original_size[0], dtype=torch.float32, device=boxes.device) width = torch.tensor(original_size[1], dtype=torch.float32, device=boxes.device) xmin, ymin, xmax, ymax = boxes.unbind(1) xmin = xmin / width xmax = xmax / width ymin = ymin / height ymax = ymax / height boxes = torch.stack((xmin, ymin, xmax, ymax), dim=1) # Convert xyxy to cxcywh return box_convert(boxes, in_fmt="xyxy", out_fmt="cxcywh")
def forward(self, x): b, _, h, w = x.shape # (b, c, h, w) => (b, h * w * num_anchors, coord + num_classes) x = x.permute(0, 2, 3, 1).contiguous().view(b, h * w * len(self.anchors), 5 + self.num_classes) # activate x = torch.cat([ torch.sigmoid(x[:, :, 0:2]), torch.exp(x[:, :, 2:4]), torch.sigmoid(x[:, :, 4:5]), torch.softmax(x[:, :, 5:], dim=2) ], dim=-1) # restore cx, cy = torch.meshgrid(torch.arange(w), torch.arange(h)) cx = cx.t().contiguous().view( -1, 1) # transpose because anchors to be organized in H x W order cy = cy.t().contiguous().view(-1, 1) centers = torch.cat([cx, cy], axis=1).float() anchors = torch.as_tensor(self.anchors) anchors[:, 0] = anchors[:, 0] * w anchors[:, 1] = anchors[:, 1] * h all_anchors = torch.cat( [ centers.view(-1, 1, 2).expand(-1, len(self.anchors), 2), anchors.view(1, -1, 2).expand(h * w, -1, 2) ], axis=2).view(-1, 4) # (h * w * num_anchors, [cx, cy, w, h]) all_anchors = all_anchors.to(x.device) x[:, :, 0:2] = x[:, :, 0:2] + all_anchors[:, 0:2] x[:, :, 2:4] = x[:, :, 2:4] * all_anchors[:, 2:4] x = torch.cat([ box_convert(x[:, :, 0:4], in_fmt='cxcywh', out_fmt='xyxy'), x[:, :, 4:] ], dim=-1) return x
def overlay_boxes(detections, path, time_consume, args): img = cv2.imread(path) if args.save_img else None for i, pred in enumerate(detections): # detections per image det_logs = '' save_path = Path(args.output_dir).joinpath(Path(path).name) txt_path = Path(args.output_dir).joinpath(Path(path).stem) if pred is not None and len(pred) > 0: # Rescale boxes from img_size to im0 size boxes, scores, labels = pred['boxes'].round( ), pred['scores'], pred['labels'] # Print results for c in labels.unique(): n = (labels == c).sum() # detections per class det_logs += '%g %ss, ' % (n, args.names[int(c)] ) # add to string # Write results for xyxy, conf, cls_name in zip(boxes, scores, labels): if args.save_txt: # Write to file # normalized cxcywh cxcywh = box_convert(xyxy, in_fmt="xyxy", out_fmt="cxcywh").tolist() with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls_name, *cxcywh)) # label format if args.save_img: # Add bbox to image label = '%s %.2f' % (args.names[int(cls_name)], conf) plot_one_box( xyxy, img, label=label, color=args.colors[int(cls_name) % len(args.colors)], line_thickness=3, ) # Print inference time print('%sDone. (%.3fs)' % (det_logs, time_consume)) # Save results (image with detections) if args.save_img: cv2.imwrite(str(save_path), img) return (boxes.tolist(), scores.tolist(), labels.tolist())