def encode(target, anchor_boxes, params): n = anchor_boxes.size(0) m = target.bbox.size(0) if m == 0: return struct(location=target.bbox.new_zeros(n, 4), classification=target.bbox.new_zeros(n, dtype=torch.long)) ious = box.iou_matrix(box.point_form(anchor_boxes), target.bbox) if params.top_anchors > 0: top_ious, inds = ious.topk(params.top_anchors, dim=0) ious = ious.scatter(0, inds, top_ious * 2) max_ious, max_ids = ious.max(1) class_target = encode_classes(target.label, max_ious, max_ids, match_thresholds=params.match_thresholds) location = target.bbox[max_ids] if params.location_loss == "l1": location = encode_boxes(location, anchor_boxes) return struct(location=location, classification=class_target)
def random_target(): num_boxes = random.randint(1, 50) boxes = torch.Tensor( [random_box(dim, num_classes) for b in range(0, num_boxes)]) boxes = box.point_form(boxes) label = torch.LongTensor(num_boxes).random_(0, num_classes) return (boxes, label)
def decode(prediction, anchor_boxes): '''Decode (encoded) prediction and anchor boxes to give detected boxes. Args: preditction: (tensor) box prediction in encoded form, sized [n, 4]. anchor_boxes: (tensor) bounding boxes in extents form, sized [m, 4]. Returns: boxes: (tensor) detected boxes in point form, sized [k, 4]. label: (tensor) detected class label [k]. ''' assert prediction.shape == anchor_boxes.shape loc_pos, loc_size = box.split(prediction) anchor_pos, anchor_size = box.split(anchor_boxes) pos = loc_pos * anchor_size + anchor_pos sizes = loc_size.exp() * anchor_size return box.point_form(torch.cat([pos, sizes], pos.dim() - 1))
def add_image_noise(image): nonlocal totals n = image.target._size centre, size = box.split(box.extents_form(image.target.bbox)) centre.add_(offset * size) if image.category == 'train': centre.add_(torch.randn(n, 2) * noise * size) size.mul_(torch.randn(n, 2) * noise + 1) noisy = box.point_form(torch.cat([centre, size], 1)) if image.category == 'train': totals += struct(iou=box.iou_matrix_matched( noisy, image.target.bbox).sum(), n=n) return image._extend(target=image.target._extend(bbox=noisy))
def crop_anchors(boxes, image_dim): return box.extents_form(clamp(box.point_form(boxes), (0, 0), image_dim))