Exemple #1
0
    def encode(self, boxes, labels, threshold=0.5):
        '''
        接收图片中物体所在的位置和它们对应的标签,为每一个anchor分配一个物体
        '''
        if len(boxes) == 0:
            return (np.zeros(self.default_boxes.shape, dtype=np.float32),
                    np.zeros(self.default_boxes.shape[:1], dtype=np.int32))

        #计算iou
        iou = bbox_iou(point_form(self.default_boxes), boxes)

        #anchor匹配与它iou最大的真实物体
        #其实有很多anchor与所有真实物体都是iou=0,但是无所谓我们先让它们匹着
        gt_idx = iou.argmax(axis=1)
        iou = iou.max(axis=1)
        boxes = boxes[gt_idx]
        labels = labels[gt_idx]

        #我们要进行回归的是anchor与真实物体的一些差距,所以先进行编码
        loc = np.hstack(
            (((boxes[:, :2] + boxes[:, 2:]) / 2 - self.default_boxes[:, :2]) /
             (self.variance[0] * self.default_boxes[:, 2:]),
             np.log(
                 (boxes[:, 2:] - boxes[:, :2]) / self.default_boxes[:, 2:]) /
             self.variance[1]))

        #这里我们将背景定义为0号标签,通过iou将那些anchor与真实物体iou小于threshold的anchor标签
        #设置为0,通过这一步我们拥有了这个标签后就能知道哪些anchor是正样本
        conf = 1 + labels
        conf[iou < threshold] = 0

        return loc.astype(np.float32), conf.astype(np.int32)
Exemple #2
0
def _crop(image, boxes, labels):
    height, width, _ = image.shape

    if len(boxes) == 0:
        return image, boxes, labels

    while True:
        mode = random.choice((
            None,
            (0.1, None),
            (0.3, None),
            (0.7, None),
            (0.9, None),
            (None, None),
        ))

        if mode is None:
            return image, boxes, labels

        min_iou, max_iou = mode
        if min_iou is None:
            min_iou = float('-inf')
        if max_iou is None:
            max_iou = float('inf')

        for _ in range(50):
            w = random.randrange(int(0.3 * width), width)
            h = random.randrange(int(0.3 * height), height)

            if h / w < 0.5 or 2 < h / w:
                continue

            l = random.randrange(width - w)
            t = random.randrange(height - h)
            roi = np.array((l, t, l + w, t + h))

            iou = bbox_iou(boxes, roi[np.newaxis])
            if not (min_iou <= iou.min() and iou.max() <= max_iou):
                continue

            image = image[roi[1]:roi[3], roi[0]:roi[2]]

            centers = (boxes[:, :2] + boxes[:, 2:]) / 2
            mask = np.logical_and(roi[:2] < centers, centers < roi[2:]) \
                     .all(axis=1)
            boxes = boxes[mask].copy()
            labels = labels[mask]

            boxes[:, :2] = np.maximum(boxes[:, :2], roi[:2])
            boxes[:, :2] -= roi[:2]
            boxes[:, 2:] = np.minimum(boxes[:, 2:], roi[2:])
            boxes[:, 2:] -= roi[:2]

            return image, boxes, labels
    def encode(self, boxes, labels, threshold=0.5):
        '''
         Receive the position of the object in the picture and their corresponding labels, assign an object to each anchor
        '''
        if len(boxes) == 0:
            return (
                np.zeros(self.default_boxes.shape, dtype=np.float32),
                np.zeros(self.default_boxes.shape[:1], dtype=np.int32))

        #Calculate iou
        iou = bbox_iou(point_form(self.default_boxes), boxes)
        #Anchor matches it with iou's largest real object
        # In fact, there are many anchors and all real objects are iou=0, but it doesn’t matter if we let them match first.
           gt_idx = iou.argmax(axis=1)
Exemple #4
0
    def encode(self, boxes, labels, threshold=0.5):

        if len(boxes) == 0:
            return (np.zeros(self.default_boxes.shape, dtype=np.float32),
                    np.zeros(self.default_boxes.shape[:1], dtype=np.int32))

        iou = bbox_iou(point_form(self.default_boxes), boxes)

        gt_idx = iou.argmax(axis=1)
        iou = iou.max(axis=1)
        boxes = boxes[gt_idx]
        labels = labels[gt_idx]

        loc = np.hstack(
            (((boxes[:, :2] + boxes[:, 2:]) / 2 - self.default_boxes[:, :2]) /
             (self.variance[0] * self.default_boxes[:, 2:]),
             np.log(
                 (boxes[:, 2:] - boxes[:, :2]) / self.default_boxes[:, 2:]) /
             self.variance[1]))

        conf = 1 + labels
        conf[iou < threshold] = 0

        return loc.astype(np.float32), conf.astype(np.int32)