def encode(self, boxes, labels, threshold=0.5): ''' 接收图片中物体所在的位置和它们对应的标签,为每一个anchor分配一个物体 ''' if len(boxes) == 0: return (np.zeros(self.default_boxes.shape, dtype=np.float32), np.zeros(self.default_boxes.shape[:1], dtype=np.int32)) #计算iou iou = bbox_iou(point_form(self.default_boxes), boxes) #anchor匹配与它iou最大的真实物体 #其实有很多anchor与所有真实物体都是iou=0,但是无所谓我们先让它们匹着 gt_idx = iou.argmax(axis=1) iou = iou.max(axis=1) boxes = boxes[gt_idx] labels = labels[gt_idx] #我们要进行回归的是anchor与真实物体的一些差距,所以先进行编码 loc = np.hstack( (((boxes[:, :2] + boxes[:, 2:]) / 2 - self.default_boxes[:, :2]) / (self.variance[0] * self.default_boxes[:, 2:]), np.log( (boxes[:, 2:] - boxes[:, :2]) / self.default_boxes[:, 2:]) / self.variance[1])) #这里我们将背景定义为0号标签,通过iou将那些anchor与真实物体iou小于threshold的anchor标签 #设置为0,通过这一步我们拥有了这个标签后就能知道哪些anchor是正样本 conf = 1 + labels conf[iou < threshold] = 0 return loc.astype(np.float32), conf.astype(np.int32)
def _crop(image, boxes, labels): height, width, _ = image.shape if len(boxes) == 0: return image, boxes, labels while True: mode = random.choice(( None, (0.1, None), (0.3, None), (0.7, None), (0.9, None), (None, None), )) if mode is None: return image, boxes, labels min_iou, max_iou = mode if min_iou is None: min_iou = float('-inf') if max_iou is None: max_iou = float('inf') for _ in range(50): w = random.randrange(int(0.3 * width), width) h = random.randrange(int(0.3 * height), height) if h / w < 0.5 or 2 < h / w: continue l = random.randrange(width - w) t = random.randrange(height - h) roi = np.array((l, t, l + w, t + h)) iou = bbox_iou(boxes, roi[np.newaxis]) if not (min_iou <= iou.min() and iou.max() <= max_iou): continue image = image[roi[1]:roi[3], roi[0]:roi[2]] centers = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = np.logical_and(roi[:2] < centers, centers < roi[2:]) \ .all(axis=1) boxes = boxes[mask].copy() labels = labels[mask] boxes[:, :2] = np.maximum(boxes[:, :2], roi[:2]) boxes[:, :2] -= roi[:2] boxes[:, 2:] = np.minimum(boxes[:, 2:], roi[2:]) boxes[:, 2:] -= roi[:2] return image, boxes, labels
def encode(self, boxes, labels, threshold=0.5): ''' Receive the position of the object in the picture and their corresponding labels, assign an object to each anchor ''' if len(boxes) == 0: return ( np.zeros(self.default_boxes.shape, dtype=np.float32), np.zeros(self.default_boxes.shape[:1], dtype=np.int32)) #Calculate iou iou = bbox_iou(point_form(self.default_boxes), boxes) #Anchor matches it with iou's largest real object # In fact, there are many anchors and all real objects are iou=0, but it doesn’t matter if we let them match first. gt_idx = iou.argmax(axis=1)
def encode(self, boxes, labels, threshold=0.5): if len(boxes) == 0: return (np.zeros(self.default_boxes.shape, dtype=np.float32), np.zeros(self.default_boxes.shape[:1], dtype=np.int32)) iou = bbox_iou(point_form(self.default_boxes), boxes) gt_idx = iou.argmax(axis=1) iou = iou.max(axis=1) boxes = boxes[gt_idx] labels = labels[gt_idx] loc = np.hstack( (((boxes[:, :2] + boxes[:, 2:]) / 2 - self.default_boxes[:, :2]) / (self.variance[0] * self.default_boxes[:, 2:]), np.log( (boxes[:, 2:] - boxes[:, :2]) / self.default_boxes[:, 2:]) / self.variance[1])) conf = 1 + labels conf[iou < threshold] = 0 return loc.astype(np.float32), conf.astype(np.int32)