def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] # cls_targets[max_ious<0.5] = 0 # ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] # cls_targets[ignore] = -1 # mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): v, i = x.max(0) j = v.max(0)[1][0] return (i[j], j) default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): t = ious[mask.nonzero().squeeze()] if len(t.shape) == 1: index[mask] = t[0].long() else: index[mask] = t.max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): '''Find the max value index(row & col) of a 2D tensor.''' v, i = x.max(0) j = v.max(0)[1].item() return (i[j], j) anchor_boxes = self.anchor_boxes # print (anchor_boxes.size()) ious = box_iou(anchor_boxes, boxes) # [#anchors, #obj] index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask].max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] # print (labels.size()) # print (cls_targets.size()) cls_targets[index < 0] = 0 return loc_targets, cls_targets
def random_crop(img, boxes, labels, min_scale=0.3, max_aspect_ratio=2.): '''Randomly crop a PIL image. Args: img: (PIL.Image) image. boxes: (tensor) bounding boxes, sized [#obj, 4]. labels: (tensor) bounding box labels, sized [#obj,]. min_scale: (float) minimal image width/height scale. max_aspect_ratio: (float) maximum width/height aspect ratio. Returns: img: (PIL.Image) cropped image. boxes: (tensor) object boxes. labels: (tensor) object labels. ''' img = img.copy() boxes = boxes.clone() labels = labels.clone() imw, imh = img.size params = [(0, 0, imw, imh)] # crop roi (x,y,w,h) out for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9): for _ in range(100): scale = random.uniform(min_scale, 1) aspect_ratio = random.uniform( max(1 / max_aspect_ratio, scale * scale), min(max_aspect_ratio, 1 / (scale * scale))) w = int(imw * scale * math.sqrt(aspect_ratio)) h = int(imh * scale / math.sqrt(aspect_ratio)) x = random.randrange(imw - w) y = random.randrange(imh - h) roi = torch.tensor([[x, y, x + w, y + h]], dtype=torch.float) ious = box_iou(boxes, roi) if ious.min() >= min_iou: params.append((x, y, w, h)) break x, y, w, h = random.choice(params) img = img.crop((x, y, x + w, y + h)) center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:,0]>=x) & (center[:,0]<=x+w) \ & (center[:,1]>=y) & (center[:,1]<=y+h) if mask.any(): boxes = boxes[mask] - torch.tensor([x, y, x, y], dtype=torch.float) boxes = box_clamp(boxes, 0, 0, w, h) labels = labels[mask] else: boxes = torch.tensor([[0, 0, 0, 0]], dtype=torch.float) labels = torch.tensor([0], dtype=torch.long) return img, boxes, labels
def nonlocal_matching(self): anchors_boxes = change_box_order(self.anchors_boxes, 'xyxy2xywh') xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) ious = box_iou(box_preds, boxes) index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1) masked_ious = ious.clone() # pdb.set_trace() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) # chose the activated bbox
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): v, i = x.max(0) j = v.max(0)[1][0] return (i[j], j) # True?: default boxes are also known as "anchors" in some contexts # Or is an anchor a default position from which multiple default boxes # are formed? default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() # Match ground truth boxes with default boxes based on IoU while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 # Assign ground truth boxes to unmatched default boxes if the overlap is good enough. # Consequence: Some ground truth boxes are matched with multiple default boxes. # Clarification: Each default box can have at most one ground truth box matched with # it. Some default boxes will not be matched with a ground truth box. mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] # Shape: (num_default_boxes, 4) # Each default box index is replaced with a ground truth box that it # was matched with. Unmatched default boxes are given the first ground # truth box, but this won't affect the location loss since unmatched # default boxes are tracked as "negative examples" via an index of -1. # Later, all class labels will be incremented, leaving the class label # of 0 free for new use. This is the class label we will assign to # negative examples, which are those with an index of -1. # I'm not sure why we couldn't just give negative examples a class # label of -1 and not change the original ground truth class labels. boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) # Add one to the label ID of each default box that was matched with a # ground truth box. Reason: We must make room for the "unassigned" # class. F.cross_entropy doesn't allow negative class numbers, # so we can use -1 for this class. Not sure why we don't use the next # available positive number, but this works. cls_targets = 1 + labels[index.clamp(min=0)] # Positive examples # Assign a class ID of 0 to unmatched default boxes. These will be # considered negative examples in the location loss function. # See SSDLoss cls_targets[index < 0] = 0 # Negative examples return loc_targets, cls_targets