def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] # cls_targets[max_ious<0.5] = 0 # ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] # cls_targets[ignore] = -1 # mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): v, i = x.max(0) j = v.max(0)[1][0] return (i[j], j) default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): t = ious[mask.nonzero().squeeze()] if len(t.shape) == 1: index[mask] = t[0].long() else: index[mask] = t.max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): '''Find the max value index(row & col) of a 2D tensor.''' v, i = x.max(0) j = v.max(0)[1].item() return (i[j], j) anchor_boxes = self.anchor_boxes # print (anchor_boxes.size()) ious = box_iou(anchor_boxes, boxes) # [#anchors, #obj] index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask].max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] # print (labels.size()) # print (cls_targets.size()) cls_targets[index < 0] = 0 return loc_targets, cls_targets
def decode(self, loc_preds, cls_preds, score_thresh=0.5, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [#anchors,4]. cls_preds: (tensor) predicted conf, sized [#anchors,#classes]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' anchor_boxes = change_box_order(self.anchor_boxes, 'xyxy2xywh').cuda() xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) # print (num_classes) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column # print (score) mask = score > score_thresh # print (mask) if not mask.any(): # print ("continue") continue box = box_preds[mask] score = score[mask] # print(box.size()) # print(score.size()) keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.empty_like(keep).fill_(i)) scores.append(score[keep]) # print (sizeof(boxes)) #print (np.array(boxes).shape) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores
def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [8732,4] or [8732,8]. cls_preds: (tensor) predicted conf, sized [8732,21]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' #print("decode") #print(loc_preds.size()) #print(cls_preds.size()) variances = (1, 0.2) #xy = loc_preds[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2] #wh = torch.exp(loc_preds[:,2:]*variances[1]) * self.default_boxes[:,2:] #box_preds = torch.cat([xy-wh/2, xy+wh/2], 1) default_boxes = change_box_order(self.default_boxes, 'xywh2xyxyxyxy') box_preds = loc_preds * variances[ 0] * self.default_boxes[:, 2:].repeat(1, 4) + default_boxes boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column mask = score > score_thresh if not mask.any(): continue box = box_preds[mask.nonzero().squeeze()] score = score[mask] keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.LongTensor(len(box[keep])).fill_(i)) scores.append(score[keep]) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores
def nonlocal_matching(self): anchors_boxes = change_box_order(self.anchors_boxes, 'xyxy2xywh') xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) ious = box_iou(box_preds, boxes) index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1) masked_ious = ious.clone() # pdb.set_trace() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) # chose the activated bbox
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): v, i = x.max(0) j = v.max(0)[1][0] return (i[j], j) # True?: default boxes are also known as "anchors" in some contexts # Or is an anchor a default position from which multiple default boxes # are formed? default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() # Match ground truth boxes with default boxes based on IoU while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 # Assign ground truth boxes to unmatched default boxes if the overlap is good enough. # Consequence: Some ground truth boxes are matched with multiple default boxes. # Clarification: Each default box can have at most one ground truth box matched with # it. Some default boxes will not be matched with a ground truth box. mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] # Shape: (num_default_boxes, 4) # Each default box index is replaced with a ground truth box that it # was matched with. Unmatched default boxes are given the first ground # truth box, but this won't affect the location loss since unmatched # default boxes are tracked as "negative examples" via an index of -1. # Later, all class labels will be incremented, leaving the class label # of 0 free for new use. This is the class label we will assign to # negative examples, which are those with an index of -1. # I'm not sure why we couldn't just give negative examples a class # label of -1 and not change the original ground truth class labels. boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) # Add one to the label ID of each default box that was matched with a # ground truth box. Reason: We must make room for the "unassigned" # class. F.cross_entropy doesn't allow negative class numbers, # so we can use -1 for this class. Not sure why we don't use the next # available positive number, but this works. cls_targets = 1 + labels[index.clamp(min=0)] # Positive examples # Assign a class ID of 0 to unmatched default boxes. These will be # considered negative examples in the location loss function. # See SSDLoss cls_targets[index < 0] = 0 # Negative examples return loc_targets, cls_targets
def encode(self, boxes, labels): #print("encode") #print(boxes.size()) #print(labels.size()) '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) #tw = log(w / anchor_w) / variance[1] #th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (x1,y1,x2,y2,x3,y3,x4,y4), sized [#obj, 8]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,8]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): v, i = x.max(0) j = v.max(0)[1][0] return (i[j], j) default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxyxyxy') ious = simplified_iou(change_box_order(self.default_boxes, 'xywh2xyxy'), boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported #boxes = change_box_order(boxes, 'xyxy2xywh') #default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (1, 0.2) # loc_xy = (boxes[:,:2]-default_boxes[:,:2]) / default_boxes[:,2:] / variances[0] # loc_wh = torch.log(boxes[:,2:]/default_boxes[:,2:]) / variances[1] # loc_targets = torch.cat([loc_xy,loc_wh], 1) loc_targets = (boxes - default_boxes) / self.default_boxes[:, 2:].repeat( 1, 4) / variances[0] cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets