Ejemplo n.º 1
0
    def creat_label(self, bboxes):
        """
        Label assignment. For a single picture all GT box bboxes are assigned anchor.
        1、Select a bbox in order, convert its coordinates("xyxy") to "xywh"; and scale bbox'
           xywh by the strides.
        2、Calculate the iou between the each detection layer'anchors and the bbox in turn, and select the largest
            anchor to predict the bbox.If the ious of all detection layers are smaller than 0.3, select the largest
            of all detection layers' anchors to predict the bbox.

        Note :
        1、The same GT may be assigned to multiple anchors. And the anchors may be on the same or different layer. Pai : even the same cell
        2、The total number of bboxes may be more than it is, because the same GT may be assigned to multiple layers
        of detection.

        """
        anchors = np.array(self.cfg_MODEL["ANCHORS"])
        strides = np.array(self.cfg_MODEL["STRIDES"])
        train_output_size = self.img_size / strides
        anchors_per_scale = self.cfg_MODEL["ANCHORS_PER_SCLAE"]

        label = [
            np.zeros((int(train_output_size[i]), int(train_output_size[i]),
                      anchors_per_scale, 6 + self.num_classes))
            for i in range(3)
        ]
        # label = [np,np.np] each have size = grid,grid, anchors_per_scale, 6+self.num_classes
        for i in range(3):
            label[i][..., 5] = 1.0

        bboxes_xywh = [np.zeros((150, 4))
                       for _ in range(3)]  # Darknet the max_num is 30
        bbox_count = np.zeros((3, ))

        for bbox in bboxes:  # start one by one gt box
            bbox_coor = bbox[:4]
            bbox_class_ind = int(bbox[4])
            bbox_mix = bbox[5]

            # onehot
            one_hot = np.zeros(self.num_classes, dtype=np.float32)
            one_hot[bbox_class_ind] = 1.0
            one_hot_smooth = dataAug.LabelSmooth()(one_hot, self.num_classes)

            # convert "xyxy" to "xywh"
            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                                        bbox_coor[2:] - bbox_coor[:2]],
                                       axis=-1)
            # print("bbox_xywh: ", bbox_xywh)

            bbox_xywh_scaled = 1.0 * bbox_xywh[
                np.
                newaxis, :] / strides[:, np.
                                      newaxis]  # scale gt box to grid unit sp shape = [3,4] each row is at diferent scale. 1 grind size is 8, 16 ,32

            iou = []
            exist_positive = False
            for i in range(3):  # we have 3 scales
                anchors_xywh = np.zeros((anchors_per_scale, 4))
                anchors_xywh[:,
                             0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                                 np.int32) + 0.5  # 0.5 for compensation
                anchors_xywh[:, 2:4] = anchors[i]

                iou_scale = tools.iou_xywh_numpy(
                    bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3

                if np.any(iou_mask):
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                        np.int32)

                    # Bug : When multiple gt bboxes correspond to the same anchor, the anchor is assigned to the last bbox by default
                    label[i][
                        yind, xind, iou_mask, 0:
                        4] = bbox_xywh  # Pai: this means the 3 anchors in that cell can be responsible to the boxes. Not choose one in this code
                    label[i][
                        yind, xind, iou_mask, 4:
                        5] = 1.0  # You seeeeeeeeeeee the confidence for the gt = 1 kuayyyyyyyyyyyy finally i got the answer
                    label[i][yind, xind, iou_mask, 5:6] = bbox_mix
                    label[i][yind, xind, iou_mask, 6:] = one_hot_smooth

                    bbox_ind = int(
                        bbox_count[i] % 150
                    )  # BUG : 150 is a prior value, memory consumption is large
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1

                    exist_positive = True

            if not exist_positive:
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                best_detect = int(best_anchor_ind / anchors_per_scale)
                best_anchor = int(best_anchor_ind % anchors_per_scale)

                xind, yind = np.floor(bbox_xywh_scaled[best_detect,
                                                       0:2]).astype(np.int32)

                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                label[best_detect][yind, xind, best_anchor, 5:6] = bbox_mix
                label[best_detect][yind, xind, best_anchor,
                                   6:] = one_hot_smooth

                bbox_ind = int(bbox_count[best_detect] % 150)
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                bbox_count[best_detect] += 1

        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh

        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
Ejemplo n.º 2
0
    def __creat_label(self, bboxes):
        """
        Label assignment. For a single picture all GT box bboxes are assigned anchor.
        1、Select a bbox in order, convert its coordinates("xyxy") to "xywh"; and scale bbox'
           xywh by the strides.
        2、Calculate the iou between the each detection layer'anchors and the bbox in turn, and select the largest
            anchor to predict the bbox.If the ious of all detection layers are smaller than 0.3, select the largest
            of all detection layers' anchors to predict the bbox.

        Note :
        1、The same GT may be assigned to multiple anchors. And the anchors may be on the same or different layer.
        2、The total number of bboxes may be more than it is, because the same GT may be assigned to multiple layers
        of detection.

        """

        anchors = np.array(cfg.MODEL["ANCHORS"])
        strides = np.array(cfg.MODEL["STRIDES"])
        train_output_size = self.img_size / strides
        anchors_per_scale = cfg.MODEL["ANCHORS_PER_SCLAE"]

        label = [
            np.zeros(
                (
                    int(train_output_size[i]),
                    int(train_output_size[i]),
                    anchors_per_scale,
                    6 + self.num_classes,
                )
            )
            for i in range(3)
        ]
        for i in range(3):
            label[i][..., 5] = 1.0

        bboxes_xywh = [
            np.zeros((150, 4)) for _ in range(3)
        ]  # Darknet the max_num is 30
        bbox_count = np.zeros((3,))

        for bbox in bboxes:
            bbox_coor = bbox[:4]
            bbox_class_ind = int(bbox[4])
            bbox_mix = bbox[5]

            # onehot
            one_hot = np.zeros(self.num_classes, dtype=np.float32)
            one_hot[bbox_class_ind] = 1.0
            one_hot_smooth = dataAug.LabelSmooth()(one_hot, self.num_classes)

            # convert "xyxy" to "xywh"
            bbox_xywh = np.concatenate(
                [
                    (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                    bbox_coor[2:] - bbox_coor[:2],
                ],
                axis=-1,
            )
            # print("bbox_xywh: ", bbox_xywh)

            bbox_xywh_scaled = (
                1.0 * bbox_xywh[np.newaxis, :] / strides[:, np.newaxis]
            )

            iou = []
            exist_positive = False
            for i in range(3):
                anchors_xywh = np.zeros((anchors_per_scale, 4))
                anchors_xywh[:, 0:2] = (
                    np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                )  # 0.5 for compensation
                anchors_xywh[:, 2:4] = anchors[i]

                iou_scale = tools.iou_xywh_numpy(
                    bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
                )
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3

                if np.any(iou_mask):
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                        np.int32
                    )

                    # Bug : 当多个bbox对应同一个anchor时,默认将该anchor分配给最后一个bbox
                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                    label[i][yind, xind, iou_mask, 4:5] = 1.0
                    label[i][yind, xind, iou_mask, 5:6] = bbox_mix
                    label[i][yind, xind, iou_mask, 6:] = one_hot_smooth

                    bbox_ind = int(bbox_count[i] % 150)  # BUG : 150为一个先验值,内存消耗大
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1

                    exist_positive = True

            if not exist_positive:
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                best_detect = int(best_anchor_ind / anchors_per_scale)
                best_anchor = int(best_anchor_ind % anchors_per_scale)

                xind, yind = np.floor(
                    bbox_xywh_scaled[best_detect, 0:2]
                ).astype(np.int32)

                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                label[best_detect][yind, xind, best_anchor, 5:6] = bbox_mix
                label[best_detect][yind, xind, best_anchor, 6:] = one_hot_smooth

                bbox_ind = int(bbox_count[best_detect] % 150)
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                bbox_count[best_detect] += 1

        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh

        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
Ejemplo n.º 3
0
    def __creat_label(self, bboxes, iou_thresh=0.5):
        """
        """
        # 将bbox中超出原图的部分裁掉
        #
        # 在COCO数据集中,有很多与边界重合的框的xmax, ymax数值标记为框的大小数值。
        # 这会使得在计算xind, yind时,可能出现“恰好越界”的错误。
        # 因此在创建label阶段将其修正。
        #
        # 类似修正亦可见于eval/evaluator.py。
        #
        bboxes = np.concatenate([
            np.maximum(bboxes[:, 0:2], [0, 0]),
            np.minimum(bboxes[:, 2:4], [self.img_size - 1, self.img_size - 1]),
            bboxes[:, 4:]
        ],
                                axis=-1)

        train_output_size = self.img_size / self.__strides

        label = [
            np.zeros((
                int(train_output_size[fmap]),  # h
                int(train_output_size[fmap]),  # w
                self.__anchors_per_scale,
                6 + self.num_classes
            ))  # c: 每个格子的anchor数 * ((x, y, w, h, obj_mask, mix, num_classes))
            for fmap in range(self.__num_feature_map)
        ]  # s, m, l
        for fmap in range(self.__num_feature_map):
            label[fmap][..., 5] = 1.0  # 默认mix为1.0

        max_objects = 100  # 从voc.py和coco.py的结果我们知道一张图不会超过100个框
        bboxes_xywh = [
            np.zeros((max_objects, 4))
            for fmap in range(self.__num_feature_map)
        ]  # 用于登记每个feature map管哪些bbox
        bbox_count = [
            0
        ] * self.__num_feature_map  # np.zeros(self.__num_feature_map)

        for bbox in bboxes:
            # (0) 去除无效的bbox
            if bbox[0] >= bbox[2] or bbox[1] >= bbox[3] or bbox[4] < 0 or bbox[
                    4] >= self.num_classes:
                continue

            # (1) 创建bbox的标签
            # bbox: [xmin, ymin, xmax, ymax, class_ind, confidence]. 均是实际值
            bbox_coor = bbox[:4]
            bbox_class_ind = int(bbox[4])
            bbox_mix = bbox[5]
            # bbox_xywh: [x, y, w, h]. 均是实际值
            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                                        bbox_coor[2:] - bbox_coor[:2]],
                                       axis=-1)
            # bbox_xywh_scaled: [[x, y, w, h] / 8, [x, y, w, h] / 16, [x, y, w, h] / 32]. 不同尺寸的特征图上, 仍均是实际值
            bbox_xywh_scaled = 1.0 * bbox_xywh[
                np.newaxis, :] / self.__strides[:, np.newaxis]
            # 将类别转换为one-hot编码
            one_hot = np.zeros(self.num_classes, dtype=np.float32)
            one_hot[bbox_class_ind] = 1.0
            one_hot_smooth = LabelSmooth()(
                one_hot, self.num_classes)  # 标签平滑化(而不是极端的0和1)

            # (2) 找超过iou_thresh的anchor
            iou = []  # 记录所有的iou
            exist_positive = False
            for fmap in range(self.__num_feature_map):  # 对于每种尺寸
                anchors_xywh = np.zeros((self.__anchors_per_scale, 4))
                anchors_xywh[:, 0:2] = np.floor(
                    bbox_xywh_scaled[fmap, 0:2]).astype(
                        np.int32) + 0.5  # 标签框的所在格子的中心点作为锚框的xy
                anchors_xywh[:, 2:4] = self.__anchors[
                    fmap]  # 锚框的wh先验给定 # 尽管多尺度训练,但锚框大小并不相应变化!

                iou_scale = tools.iou_xywh_numpy(
                    bbox_xywh_scaled[fmap][np.newaxis, :],
                    anchors_xywh)  # 求iou(求得self.__anchors_per_scale个iou)
                iou.append(iou_scale)
                iou_mask = iou_scale >= iou_thresh

                if np.any(
                        iou_mask
                ):  # any() 函数用于判断给定的可迭代参数 iterable 是否全部为 False. 是则返回 False, 否则返回 True.
                    xind, yind = np.floor(bbox_xywh_scaled[fmap, 0:2]).astype(
                        np.int32)

                    label[fmap][yind, xind, iou_mask, 0:4] = bbox_xywh
                    label[fmap][yind, xind, iou_mask, 4:5] = 1.0
                    label[fmap][yind, xind, iou_mask, 5:6] = bbox_mix
                    label[fmap][yind, xind, iou_mask, 6:] = one_hot_smooth

                    bboxes_xywh[fmap][
                        bbox_count[fmap] %
                        max_objects, :4] = bbox_xywh  # 在bboxes_xywh上登记
                    bbox_count[fmap] += 1

                    exist_positive = True

            # (3) 如果都没有超过iou_thresh,找iou最大的anchor
            if not exist_positive:
                best_anchor_ind = np.argmax(
                    np.array(iou).reshape(-1), axis=-1
                )  # 共(self.__num_feature_map * self.__anchors_per_scale)个iou,重新组织一下
                best_fmap = int(best_anchor_ind / self.__anchors_per_scale)
                best_anchor = int(best_anchor_ind % self.__anchors_per_scale)

                xind, yind = np.floor(bbox_xywh_scaled[best_fmap,
                                                       0:2]).astype(np.int32)

                label[best_fmap][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_fmap][yind, xind, best_anchor, 4:5] = 1.0
                label[best_fmap][yind, xind, best_anchor, 5:6] = bbox_mix
                label[best_fmap][yind, xind, best_anchor, 6:] = one_hot_smooth

                bboxes_xywh[best_fmap][
                    bbox_count[best_fmap] %
                    max_objects, :4] = bbox_xywh  # 在bboxes_xywh上登记
                bbox_count[best_fmap] += 1

        label_s, label_m, label_l = label
        bboxes_s, bboxes_m, bboxes_l = bboxes_xywh

        return label_s, label_m, label_l, bboxes_s, bboxes_m, bboxes_l