def __create_label(self, bboxes): """ (1.25, 1.625), (2.0, 3.75), (4.125, 2.875) 这三个anchor用于small_detector预测小物体 [(1.875, 3.8125), (3.875, 2.8125), (3.6875, 7.4375) 这三个anchor用于medium_detector预测中物体 [(3.625, 2.8125), (4.875, 6.1875), (11.65625, 10.1875) 这三个anchor用于big_detector预测大物体 与bbox有最大IOU的anchor,视为best anchor,best anchor所属的detector负责预测该bbox, 根据这一准则,对于一张图的所有bbox,每个detector都有自己要负责预测的bbox small_detector负责预测的bbox放在sbboxes中, medium_detector负责预测的bbox放在mbboxes中 big_detector负责预测的bbox放在lbboxes中 需始终记住: small_detector对应下标索引0, medium_detector对应下标索引1,big_detector对应下标索引2 :param bboxes: 一张图对应的所有bbox和每个bbox所属的类别,bbox的坐标为(xmin, ymin, xmax, ymax, class_ind) :return: label_sbbox: shape为(input_size / 8, input_size / 8, anchor_per_scale, 5 + num_classes) label_mbbox: shape为(input_size / 16, input_size / 16, anchor_per_scale, 5 + num_classes) label_lbbox: shape为(input_size / 32, input_size / 32, anchor_per_scale, 5 + num_classes) 只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小 其他非best anchor对应位置的数据都为(0, 0, 0, 0, 0, 0...) sbboxes:shape为(max_bbox_per_scale, 4) mbboxes:shape为(max_bbox_per_scale, 4) lbboxes:shape为(max_bbox_per_scale, 4) 存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小 bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU """ label = [np.zeros((self.__train_output_sizes[i], self.__train_output_sizes[i], self.__anchor_per_scale, 5 + self.__num_classes)) for i in range(3)] bboxes_xywh = [np.zeros((self.__max_bbox_per_scale, 4)) for _ in range(3)] bbox_count = np.zeros((3,)) for bbox in bboxes: bbox_coor = bbox[:4] bbox_class_ind = bbox[4] # label smooth onehot = np.zeros(self.__num_classes, dtype=np.float) onehot[bbox_class_ind] = 1.0 uniform_distribution = np.full(self.__num_classes, 1.0 / self.__num_classes) deta = 0.01 smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution # (1)(xmin, ymin, xmax, ymax) -> (x, y, w, h) bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) # (2)(x, y, w, h) / stride # 对bbox使用三种stride,得到三个detector对应的尺度 bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.__strides[:, np.newaxis] # (3)计算所有Anchor与该bbox的IOU,并获取最大IOU对应的best anchor iou = [] exist_positive = False for i in range(3): anchors_xywh = np.zeros((self.__anchor_per_scale, 4)) anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 anchors_xywh[:, 2:4] = self.__anchors[i] iou_scale = utils.iou_calc2(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) iou.append(iou_scale) iou_mask = iou_scale > 0.3 if np.any(iou_mask): xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) # (4)将iou大于0.3的anchor对应位置的数据标识为(x, y, w, h, 1, classes) # 首先需要将该Anchor对应的标签清零,因为某个Anchor可能与多个bbox的IOU大于0.3 # 如果不清零,那么该Anchor可能会被标记为多类 label[i][yind, xind, iou_mask, :] = 0 label[i][yind, xind, iou_mask, 0:4] = bbox_xywh label[i][yind, xind, iou_mask, 4:5] = 1.0 label[i][yind, xind, iou_mask, 5:] = onehot bbox_ind = int(bbox_count[i] % self.__max_bbox_per_scale) bboxes_xywh[i][bbox_ind, :4] = bbox_xywh bbox_count[i] += 1 exist_positive = True if not exist_positive: best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) best_detect = int(best_anchor_ind / self.__anchor_per_scale) best_anchor = int(best_anchor_ind % self.__anchor_per_scale) xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) # (4)将best_anchor对应位置的数据标识为(x, y, w, h, 1, classes) # 首先需要将该Anchor对应的标签清零,因为某个Anchor可能与多个bbox有最大IOU, # 当输入图片尺寸为416时,与多个bbox有最大IOU的Anchor总共有248个 # 如果不清零,那么该Anchor可能会被标记为多类 label[best_detect][yind, xind, best_anchor, :] = 0 label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 label[best_detect][yind, xind, best_anchor, 5:] = onehot bbox_ind = int(bbox_count[best_detect] % self.__max_bbox_per_scale) bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh bbox_count[best_detect] += 1 label_sbbox, label_mbbox, label_lbbox = label sbboxes, mbboxes, lbboxes = bboxes_xywh return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
def __create_label(self, bboxes): """ (1.25, 1.625), (2.0, 3.75), (4.125, 2.875) 这三个anchor用于small_detector预测小物体 [(1.875, 3.8125), (3.875, 2.8125), (3.6875, 7.4375) 这三个anchor用于medium_detector预测中物体 [(3.625, 2.8125), (4.875, 6.1875), (11.65625, 10.1875) 这三个anchor用于big_detector预测大物体 与bbox有最大IOU的anchor,视为best anchor,best anchor所属的detector负责预测该bbox, 根据这一准则,对于一张图的所有bbox,每个detector都有自己要负责预测的bbox small_detector负责预测的bbox放在sbboxes中, medium_detector负责预测的bbox放在mbboxes中 big_detector负责预测的bbox放在lbboxes中 需始终记住: small_detector对应下标索引0, medium_detector对应下标索引1,big_detector对应下标索引2 :param bboxes: 一张图对应的所有bbox和每个bbox所属的类别,bbox的坐标为(xmin, ymin, xmax, ymax, class_ind) :return: label_sbbox: shape为(input_size / 8, input_size / 8, anchor_per_scale, 5 + num_classes) label_mbbox: shape为(input_size / 16, input_size / 16, anchor_per_scale, 5 + num_classes) label_lbbox: shape为(input_size / 32, input_size / 32, anchor_per_scale, 5 + num_classes) 只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小 其他非best anchor对应位置的数据都为(0, 0, 0, 0, 0, 0...) sbboxes:shape为(max_bbox_per_scale, 4) mbboxes:shape为(max_bbox_per_scale, 4) lbboxes:shape为(max_bbox_per_scale, 4) 存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小 bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU """ label = [np.zeros((self.__train_output_sizes[i], self.__train_output_sizes[i], self.__anchor_per_scale, 5 + self.__num_classes)) for i in range(3)] bboxes_xywh = [np.zeros((self.__max_bbox_per_scale, 4)) for _ in range(3)] bbox_count = np.zeros((3,)) for bbox in bboxes: bbox_coor = bbox[:4] bbox_class_ind = bbox[4] # (1)(xmin, ymin, xmax, ymax) -> (x, y, w, h) bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) # (2)(x, y, w, h) / stride # 对bbox使用三种stride,得到三个detector对应的尺度 bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.__strides[:, np.newaxis] # (3)计算所有Anchor与该bbox的IOU,并获取最大IOU对应的best anchor iou = [] for i in range(3): anchors_xywh = np.zeros((self.__anchor_per_scale, 4)) anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 anchors_xywh[:, 2:4] = self.__anchors[i] iou.append(utils.iou_calc2(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)) best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) best_detect = int(best_anchor_ind / self.__anchor_per_scale) best_anchor = int(best_anchor_ind % self.__anchor_per_scale) xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) # (4)将best_anchor对应位置的数据标识为(x, y, w, h, 1, classes) label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 label[best_detect][yind, xind, best_anchor, 5 + bbox_class_ind] = 1.0 bbox_ind = int(bbox_count[best_detect] % self.__max_bbox_per_scale) bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh bbox_count[best_detect] += 1 label_sbbox, label_mbbox, label_lbbox = label sbboxes, mbboxes, lbboxes = bboxes_xywh return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes