def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] # cls_targets[max_ious<0.5] = 0 # ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] # cls_targets[ignore] = -1 # mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) / variance[1] th = log(h / anchor_h) / variance[1] Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py ''' def argmax(x): '''Find the max value index(row & col) of a 2D tensor.''' v, i = x.max(0) j = v.max(0)[1].item() return (i[j], j) default_boxes = self.default_boxes_new # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_(-1) masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j masked_ious[i, :] = 0 masked_ious[:, j] = 0 mask = (index < 0) & (ious.max(1)[0] >= 0.5) if mask.any(): index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets
def encode(self, boxes, labels): def argmax(x): v, i = x.max(0) # j = v.max(0)[1][0] j = v.max(0)[1].item() return (i[j], j) # 第j个obj 以及第j个obj的最大anchors坐标 default_boxes = self.default_boxes # xywh default_boxes = change_box_order(default_boxes, 'xywh2xyxy') ious = box_iou(default_boxes, boxes) # [#anchors, #obj] index = torch.LongTensor(len(default_boxes)).fill_( -1) # 与anchor匹配的boxes坐标 masked_ious = ious.clone() while True: i, j = argmax(masked_ious) if masked_ious[i, j] < 1e-6: break index[i] = j #设置与anchor匹配度的boxes坐标 masked_ious[i, :] = 0 # 设置设置过得roi为0,表示已经搜索过次roi, 对应于while里的条件 masked_ious[:, j] = 0 mask = (index < 0) & ( ious.max(1)[0] >= 0.5 ) # 没有在第一次进行匹配到的 并且 对于每一个anchor与任何boxes的roi大于0.5的 if mask.any(): # 如果存在 # index[mask] = ious[mask.nonzero().squeeze()].max(1)[1] index[mask] = ious[mask].max(1)[1] #设置匹配 【1】表示使用坐标位置 对应于58行 boxes = boxes[index.clamp(min=0)] # negative index not supported boxes = change_box_order(boxes, 'xyxy2xywh') default_boxes = change_box_order(default_boxes, 'xyxy2xywh') variances = (0.1, 0.2) loc_xy = (boxes[:, :2] - default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[index.clamp(min=0)] cls_targets[index < 0] = 0 return loc_targets, cls_targets # cls>0 的是正样本 其他为0 ; loc在cls=0的地方是无效值
def encode_(self, image, boxes, labels): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' anchor_boxes = self.default_boxes # xywh anchor_boxes = change_box_order(anchor_boxes, 'xywh2xyxy') default_boxes_ = anchor_boxes ious = box_iou(anchor_boxes, boxes) max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] boxes = change_box_order(boxes, 'xyxy2xywh') anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh') variances = (0.1, 0.2) # variances = (1, 1) loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] / variances[0] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) / variances[1] loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # mark ignored to -1 # return loc_targets, cls_targets, self.default_boxes, default_boxes_ return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): """Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj, ]. input_size: (int/tuple) model input size of (w, h), should be the same. Returns: loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4]. cls_trues: (tensor) encoded class labels, sized [#anchors, ]. """ input_size = _make_list_input_size(input_size) boxes = tf.reshape(boxes, [-1, 4]) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes *= tf.tile(input_size, [2]) # scaled back to original size ious = box_iou(anchor_boxes, boxes, order='xywh') max_ids = tf.argmax(ious, axis=1) max_ious = tf.reduce_max(ious, axis=1) boxes = tf.gather(boxes, max_ids) # broadcast automatically, [#anchors, 4] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = tf.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_trues = tf.concat([loc_xy, loc_wh], 1) cls_trues = tf.gather(labels, max_ids) # TODO: check if needs add 1 here cls_trues = tf.where(max_ious < 0.5, tf.zeros_like(cls_trues), cls_trues) ignore = (max_ious > 0.4) & ( max_ious < 0.5) # ignore ious between (0.4, 0.5), and marked as -1 cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues) cls_trues = tf.cast(cls_trues, tf.float32) return loc_trues, cls_trues
def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [#anchors,4]. cls_preds: (tensor) predicted conf, sized [#anchors,#classes]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' anchor_boxes = change_box_order(self.anchor_boxes, 'xyxy2xywh') xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column mask = score > score_thresh if not mask.any(): continue box = box_preds[mask] score = score[mask] # print(box.size()) # print(score.size()) keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.empty_like(keep).fill_(i)) scores.append(score[keep]) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores
def encode(self, boxes, labels, input_size, pos_iou_threshold=0.5, neg_iou_threshold=0.4): """Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj, ]. input_size: (int/tuple) model input size of (w, h), should be the same. Returns: loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4]. cls_trues: (tensor) encoded class labels, sized [#anchors, ]. """ input_size = _make_list_input_size(input_size) boxes = tf.reshape(boxes, [-1, 4]) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes *= tf.tile(input_size, [ 2 ]) # scaled back to original size ####exchange these two lines???? ious = box_iou(anchor_boxes, boxes, order='xywh') #[#anchor, num_bboxes] max_ids = tf.argmax(ious, axis=1) #[#anchor,] max_ious = tf.reduce_max(ious, axis=1) #[#anchor,] gboxes = tf.gather(boxes, max_ids) # broadcast automatically, [#anchors, 4] loc_xy = (gboxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = tf.log(gboxes[:, 2:] / anchor_boxes[:, 2:]) loc_trues = tf.concat([loc_xy, loc_wh], 1) #[#anchors, 4] cls_trues = tf.gather(labels, max_ids) # TODO: check if needs add 1 here cls_trues = tf.where(max_ious < pos_iou_threshold, tf.zeros_like(cls_trues), cls_trues) ignore = (max_ious > neg_iou_threshold) & ( max_ious < pos_iou_threshold ) # ignore ious between (0.4, 0.5), and marked as -1 cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues) cls_trues = tf.cast(cls_trues, tf.float32) ################################################################################### """second bigger iou """ if conf.use_secondbig_loss_constrain: mask_ious = tf.one_hot(max_ids, tf.shape(ious, out_type=tf.int32)[1]) ious -= mask_ious second_max_ids = tf.argmax(ious, axis=1) #[#anchor,] sec_gboxes = tf.gather( boxes, second_max_ids) # broadcast automatically, [#anchors, 4] se_loc_xy = (sec_gboxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] se_loc_wh = tf.log(sec_gboxes[:, 2:] / anchor_boxes[:, 2:]) sec_loc_trues = tf.concat([se_loc_xy, se_loc_wh], 1) loc_trues = tf.concat([loc_trues, sec_loc_trues], 1) ################################################################################### return loc_trues, cls_trues